In [1]:
import json
import pandas as pd
import numpy as np
import orga_functions as org

In [2]:
from X1_DataPreparation import change_column
from X1_DataPreparation import og_column_list
from X1_DataPreparation import og_int_column_list

In [3]:
# read in the feature dataframe

path = org.path("02_AlleFeatureList.csv")
feature_df = pd.read_csv(path, sep =";")

## FeatureList DataPreparation

In [4]:
feature_df

Unnamed: 0,names,mean,25%,50%,75%,min,max
0,co_gt,2.168604,1.1,1.8,2.9,0.1,5.6
1,pt08_s1_co,1098.303412,932.0,1061.0,1234.0,647.0,1687.0
2,nmhc_gt,218.607666,66.0,145.0,297.0,0.1,643.5
3,c6h6_gt,10.458205,4.7,8.6,14.4,0.1,28.95
4,pt08_s2_nmhc,953.579453,749.0,925.0,1130.0,749.0,1701.5
5,nox_gt,232.360072,89.0,164.0,303.75,0.1,625.875
6,pt08_s3_nox,848.762615,672.0,818.0,984.0,204.0,1452.0
7,no2_gt,106.251831,73.0,103.0,132.0,0.1,220.5
8,pt08_s4_no2,1507.352834,1305.0,1508.0,1707.0,702.0,2310.0
9,pt08_s5_o3,1024.200026,737.0,962.0,1272.0,253.0,2074.5


#### Original Names

In [5]:
# Original Names List
original_names = og_column_list()

In [6]:
# Replace names by original/raw names
def og_name(name):
    for og in original_names:
        if change_column(og) == name:
            return og
    return name

In [7]:
feature_df.names = [og_name(x) for x in feature_df.names]

In [8]:
feature_df

Unnamed: 0,names,mean,25%,50%,75%,min,max
0,CO(GT),2.168604,1.1,1.8,2.9,0.1,5.6
1,PT08.S1(CO),1098.303412,932.0,1061.0,1234.0,647.0,1687.0
2,NMHC(GT),218.607666,66.0,145.0,297.0,0.1,643.5
3,C6H6(GT),10.458205,4.7,8.6,14.4,0.1,28.95
4,PT08.S2(NMHC),953.579453,749.0,925.0,1130.0,749.0,1701.5
5,NOx(GT),232.360072,89.0,164.0,303.75,0.1,625.875
6,PT08.S3(NOx),848.762615,672.0,818.0,984.0,204.0,1452.0
7,NO2(GT),106.251831,73.0,103.0,132.0,0.1,220.5
8,PT08.S4(NO2),1507.352834,1305.0,1508.0,1707.0,702.0,2310.0
9,PT08.S5(O3),1024.200026,737.0,962.0,1272.0,253.0,2074.5


#### Names as Index

In [9]:
feature_df.set_index('names', inplace = True)

In [10]:
feature_df[["min", "max"]]

Unnamed: 0_level_0,min,max
names,Unnamed: 1_level_1,Unnamed: 2_level_1
CO(GT),0.1,5.6
PT08.S1(CO),647.0,1687.0
NMHC(GT),0.1,643.5
C6H6(GT),0.1,28.95
PT08.S2(NMHC),749.0,1701.5
NOx(GT),0.1,625.875
PT08.S3(NOx),204.0,1452.0
NO2(GT),0.1,220.5
PT08.S4(NO2),702.0,2310.0
PT08.S5(O3),253.0,2074.5


# Create new entries

### Functions

In [11]:
def get_min(name):
    return feature_df.loc[name]["min"]
                         
def get_max(name):
    return feature_df.loc[name]["max"]
                         

In [12]:
int_columns = og_int_column_list()

In [13]:
def get_correct(name):
    if name in int_columns:
        value = np.random.randint(get_min(name), get_max(name))
        return value
    else:
        value = np.random.uniform(get_min(name), get_max(name))
        return round(value, 2)

In [14]:
def get_too_small(name):
    if name in int_columns:
        value = np.random.randint(-250, get_min(name)-1)
        return value
    else:
        value = np.random.uniform(-250, get_min(name)-1)
        return round(value, 2)

In [15]:
def get_too_big(name):
    if name in int_columns:
        value = np.random.randint(get_max(name)+1, 5000)
        return value
    else:
        value = np.random.uniform(get_max(name)+1, 5000)
        return round(value, 2)

## Entry with only correct values

In [16]:
corr_dict = {}
for i in feature_df.index.values:
    corr_dict[i] = get_correct(i)

In [17]:
corr_dict["Date"] = "07/02/2005"
corr_dict["Time"] = '00.00.00'

## Entry with some too small values

In [18]:
change = 1
small_dict = {}
for i in feature_df.index.values:
    if change %2 == 0:
        small_dict[i] = get_correct(i)
    else:
        small_dict[i] = get_too_small(i)
    change+=1

In [19]:
small_dict["Date"] = "07/02/2005"
small_dict["Time"] = '01.00.00'

## Entry some too big values

In [20]:
change = 1
big_dict = {}
for i in feature_df.index.values:
    if change % 2 == 0:
        big_dict[i] = get_correct(i)
    else:
        big_dict[i] = get_too_big(i)
    change+=1

In [21]:
big_dict["Date"] = "07/02/2005"
big_dict["Time"] = '02.00.00'

## Entry with mix values

In [22]:
change = 1
mix_dict = {}
for i in feature_df.index.values:
    if change % 3 == 0:              #3,6,9,12
        mix_dict[i] = get_too_big(i)
    elif change % 2 == 0:            #2,4,8,10,(12)
        mix_dict[i] = get_too_small(i)
    else:                            #1,5,7,11,13
        mix_dict[i] = get_correct(i)
    change+=1

In [23]:
mix_dict["Date"] = "07/02/2005"
mix_dict["Time"] = '03.00.00'

## Entry with missing  values

In [24]:
missing_dict = {}

In [25]:
missing_dict["Date"] = "07/02/2005"
missing_dict["Time"] = '04.00.00'

In [26]:
missing_dict["CO(GT)"] = get_correct('CO(GT)')
missing_dict["PT08.S1(CO)"] = get_correct('PT08.S1(CO)')
missing_dict["NO2(GT)"] = get_correct('NO2(GT)')

In [27]:
missing_dict["NOx(GT)"] = np.nan

## entry with unknown columns

In [28]:
dict_unknown = {}
for i in feature_df.index.values:
    dict_unknown[i] = get_correct(i)
dict_unknown["unknown"] = "low"
dict_unknown["Date"] = "07/02/2005"
dict_unknown["Time"] = '05.00.00'

# more entries

In [29]:
corr_dict2 = {}
for i in feature_df.index.values:
    corr_dict2[i] = get_correct(i)

corr_dict2["Date"] = "07/02/2005"
corr_dict2["Time"] = '06.00.00'

In [30]:
corr_dict3 = {}
for i in feature_df.index.values:
    corr_dict3[i] = get_correct(i)

corr_dict3["Date"] = "07/02/2005"
corr_dict3["Time"] = '07.00.00'

# Save  LiveData in JSON File

In [31]:
#function
def entry_list(*entries):
    list = []
    for e in entries:
        list.append(e)
    return list

## Save

In [32]:
first_entry = entry_list(corr_dict)

In [33]:
second_entry = entry_list(small_dict)

In [34]:
third_entry = entry_list(big_dict)

In [35]:
fourth_entry = entry_list(mix_dict)

In [36]:
fifth_entry = entry_list(missing_dict)

In [37]:
sixth_entry = entry_list(dict_unknown)

In [38]:
other_entries = entry_list(corr_dict2, corr_dict3)

In [39]:
# Save as JSON file
new_path = org.path("01_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(first_entry, jf, indent = 2)

In [40]:
# Save as JSON file
new_path = org.path("02_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(second_entry, jf, indent = 2)

In [41]:
# Save as JSON file
new_path = org.path("03_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(third_entry, jf, indent = 2)

In [42]:
# Save as JSON file
new_path = org.path("04_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(fourth_entry, jf, indent = 2)

In [43]:
# Save as JSON file
new_path = org.path("05_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(fifth_entry, jf, indent = 2)

In [45]:
# Save as JSON file
new_path = org.path("06_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(sixth_entry, jf, indent = 2)

In [46]:
# Save as JSON file
new_path = org.path("07_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(other_entries, jf, indent = 2)

### Output

In [47]:
#Output
print(json.dumps(first_entry, indent = 2))

[
  {
    "CO(GT)": 2.57,
    "PT08.S1(CO)": 978,
    "NMHC(GT)": 54,
    "C6H6(GT)": 22.99,
    "PT08.S2(NMHC)": 1043,
    "NOx(GT)": 126,
    "PT08.S3(NOx)": 358,
    "NO2(GT)": 130,
    "PT08.S4(NO2)": 2055,
    "PT08.S5(O3)": 337,
    "T": 14.81,
    "RH": 41.4,
    "AH": 1.53,
    "Date": "07/02/2005",
    "Time": "00.00.00"
  }
]


In [48]:
#Output
print(json.dumps(other_entries, indent = 2))

[
  {
    "CO(GT)": 2.4,
    "PT08.S1(CO)": 1662,
    "NMHC(GT)": 275,
    "C6H6(GT)": 26.17,
    "PT08.S2(NMHC)": 842,
    "NOx(GT)": 525,
    "PT08.S3(NOx)": 293,
    "NO2(GT)": 146,
    "PT08.S4(NO2)": 2309,
    "PT08.S5(O3)": 992,
    "T": 24.34,
    "RH": 36.48,
    "AH": 1.74,
    "Date": "07/02/2005",
    "Time": "06.00.00"
  },
  {
    "CO(GT)": 5.13,
    "PT08.S1(CO)": 895,
    "NMHC(GT)": 159,
    "C6H6(GT)": 14.67,
    "PT08.S2(NMHC)": 1128,
    "NOx(GT)": 599,
    "PT08.S3(NOx)": 921,
    "NO2(GT)": 34,
    "PT08.S4(NO2)": 847,
    "PT08.S5(O3)": 1341,
    "T": 8.51,
    "RH": 13.68,
    "AH": 0.71,
    "Date": "07/02/2005",
    "Time": "07.00.00"
  }
]
