In [2]:
import json
import pandas as pd
import numpy as np
import orga_functions as org
import functions as func

In [5]:
# read in the dataframe
path = org.path("00_FeatureList_original.csv")
#path = org.path("02_PlausFeatureList.csv")
feature_df = pd.read_csv(path, sep =";")

## FeatureList DataPreparation

In [6]:
feature_df

Unnamed: 0,names,mean,25%,50%,75%,min,max
0,co_gt,2.168604,1.1,1.8,2.9,0.1,5.6
1,pt08_s1_co,1098.303412,932.0,1061.0,1234.0,647.0,1687.0
2,c6h6_gt,10.458205,4.7,8.6,14.4,0.1,28.95
3,pt08_s2_nmhc,953.579453,749.0,925.0,1130.0,749.0,1701.5
4,nox_gt,232.360072,89.0,164.0,303.75,0.1,625.875
5,pt08_s3_nox,848.762615,672.0,818.0,984.0,204.0,1452.0
6,no2_gt,106.251831,73.0,103.0,132.0,0.1,220.5
7,pt08_s4_no2,1507.352834,1305.0,1508.0,1707.0,702.0,2310.0
8,pt08_s5_o3,1024.200026,737.0,962.0,1272.0,253.0,2074.5
9,t,19.476041,13.1,19.3,25.4,0.3,43.85


#### Original Names

In [8]:
# Original Dataframe
original_names = pd.read_csv(org.path("00_AirQuality_original.csv"), sep=';', nrows=1).columns.tolist()
del original_names[15:]

In [9]:
# Replace names by original/raw names
def og_name(name):
    for og in original_names:
        if func.change_column(og) == name:
            return og
    return name

In [10]:
feature_df.names = [og_name(x) for x in feature_df.names]

In [11]:
feature_df

Unnamed: 0,names,mean,25%,50%,75%,min,max
0,CO(GT),2.168604,1.1,1.8,2.9,0.1,5.6
1,PT08.S1(CO),1098.303412,932.0,1061.0,1234.0,647.0,1687.0
2,C6H6(GT),10.458205,4.7,8.6,14.4,0.1,28.95
3,PT08.S2(NMHC),953.579453,749.0,925.0,1130.0,749.0,1701.5
4,NOx(GT),232.360072,89.0,164.0,303.75,0.1,625.875
5,PT08.S3(NOx),848.762615,672.0,818.0,984.0,204.0,1452.0
6,NO2(GT),106.251831,73.0,103.0,132.0,0.1,220.5
7,PT08.S4(NO2),1507.352834,1305.0,1508.0,1707.0,702.0,2310.0
8,PT08.S5(O3),1024.200026,737.0,962.0,1272.0,253.0,2074.5
9,T,19.476041,13.1,19.3,25.4,0.3,43.85


#### Names as Index

In [12]:
feature_df.set_index('names', inplace = True)

In [13]:
feature_df.drop(index = ["month", "hour"], inplace = True)

KeyError: "['month' 'hour'] not found in axis"

In [10]:
feature_df[["min", "max"]]

Unnamed: 0_level_0,min,max
names,Unnamed: 1_level_1,Unnamed: 2_level_1
CO(GT),0.1,11.9
PT08.S1(CO),647.0,2040.0
NMHC(GT),7.0,1189.0
C6H6(GT),0.1,63.7
PT08.S2(NMHC),383.0,2214.0
NOx(GT),2.0,1479.0
PT08.S3(NOx),322.0,2683.0
NO2(GT),2.0,333.0
PT08.S4(NO2),657.0,2775.0
PT08.S5(O3),253.0,2523.0


# Create new entries

### Functions

In [11]:
def get_min(name):
    return feature_df.loc[name]["min"]
                         
def get_max(name):
    return feature_df.loc[name]["max"]
                         

In [12]:
int_columns = func.og_int_column_list()

In [13]:
def get_correct(name):
    if name in int_columns:
        value = np.random.randint(get_min(name), get_max(name))
        return value
    else:
        value = np.random.uniform(get_min(name), get_max(name))
        return round(value, 2)

In [14]:
def get_too_small(name):
    if name in int_columns:
        value = np.random.randint(-250, get_min(name)-1)
        return value
    else:
        value = np.random.uniform(-250, get_min(name)-1)
        return round(value, 2)

In [15]:
def get_too_big(name):
    if name in int_columns:
        value = np.random.randint(get_max(name)+1, 5000)
        return value
    else:
        value = np.random.uniform(get_max(name)+1, 5000)
        return round(value, 2)

## Entry with only correct values

In [16]:
corr_dict = {}
for i in feature_df.index.values:
    corr_dict[i] = get_correct(i)

In [17]:
corr_dict["Date"] = "07/02/2005"
corr_dict["Time"] = '00.00.00'

## Entry with some too small values

In [18]:
change = 1
small_dict = {}
for i in feature_df.index.values:
    if change %2 == 0:
        small_dict[i] = get_correct(i)
    else:
        small_dict[i] = get_too_small(i)
    change+=1

In [19]:
small_dict["Date"] = "07/02/2005"
small_dict["Time"] = '01.00.00'

## Entry some too big values

In [20]:
change = 1
big_dict = {}
for i in feature_df.index.values:
    if change % 2 == 0:
        big_dict[i] = get_correct(i)
    else:
        big_dict[i] = get_too_big(i)
    change+=1

In [21]:
big_dict["Date"] = "07/02/2005"
big_dict["Time"] = '02.00.00'

## Entry with mix values

In [22]:
change = 1
mix_dict = {}
for i in feature_df.index.values:
    if change % 3 == 0:              #3,6,9,12
        mix_dict[i] = get_too_big(i)
    elif change % 2 == 0:            #2,4,8,10,(12)
        mix_dict[i] = get_too_small(i)
    else:                            #1,5,7,11,13
        mix_dict[i] = get_correct(i)
    change+=1

In [23]:
mix_dict["Date"] = "07/02/2005"
mix_dict["Time"] = '03.00.00'

## Entry with missing  values

In [24]:
missing_dict = {}

In [25]:
missing_dict["Date"] = "07/02/2005"
missing_dict["Time"] = '04.00.00'

In [26]:
missing_dict["CO(GT)"] = get_correct('CO(GT)')
missing_dict["PT08.S1(CO)"] = get_correct('PT08.S1(CO)')
missing_dict["NO2(GT)"] = get_correct('NO2(GT)')

In [27]:
missing_dict["NOx(GT)"] = np.nan

# more entries

In [28]:
corr_dict2 = {}
for i in feature_df.index.values:
    corr_dict2[i] = get_correct(i)

corr_dict2["Date"] = "07/02/2005"
corr_dict2["Time"] = '05.00.00'

In [29]:
corr_dict3 = {}
for i in feature_df.index.values:
    corr_dict3[i] = get_correct(i)

corr_dict3["Date"] = "07/02/2005"
corr_dict3["Time"] = '06.00.00'

In [30]:
corr_dict4 = {}
for i in feature_df.index.values:
    corr_dict4[i] = get_correct(i)

corr_dict4["Date"] = "07/02/2005"
corr_dict4["Time"] = '07.00.00'

In [31]:
corr_dict5 = {}
for i in feature_df.index.values:
    corr_dict5[i] = get_correct(i)

corr_dict5["Date"] = "07/02/2005"
corr_dict5["Time"] = '08.00.00'

# Save  LiveData in JSON File

In [32]:
#function
def entry_list(*entries):
    list = []
    for e in entries:
        list.append(e)
    return list

### Save some entries without missing values

In [33]:
entries_v1 = entry_list(corr_dict, small_dict, big_dict, mix_dict)

In [34]:
# Save as JSON file
new_path = org.path("00_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(entries_v1, jf, indent = 2)

### Save only the entry with missing values

In [35]:
# Save as JSON file
new_path = org.path("01_LiveData_missing.json")

with open(new_path, 'w') as jf:
    json.dump(entry_list(missing_dict), jf, indent = 2)

### Save all

In [36]:
entries_all = entry_list(corr_dict, small_dict, big_dict, mix_dict, missing_dict, corr_dict2, corr_dict3, corr_dict4, corr_dict5)

In [37]:
# Save as JSON file
new_path = org.path("02_LiveData.json")

with open(new_path, 'w') as jf:
    json.dump(entries_all, jf, indent = 2)

In [38]:
#Output
print(json.dumps(entries_all, indent = 2))

[
  {
    "CO(GT)": 3.53,
    "PT08.S1(CO)": 1107,
    "NMHC(GT)": 466,
    "C6H6(GT)": 63.68,
    "PT08.S2(NMHC)": 2038,
    "NOx(GT)": 550,
    "PT08.S3(NOx)": 2437,
    "NO2(GT)": 40,
    "PT08.S4(NO2)": 1878,
    "PT08.S5(O3)": 1977,
    "T": 18.89,
    "RH": 46.43,
    "AH": 0.61,
    "Date": "07/02/2005",
    "Time": "00.00.00"
  },
  {
    "CO(GT)": -76.52,
    "PT08.S1(CO)": 1557,
    "NMHC(GT)": -74,
    "C6H6(GT)": 24.77,
    "PT08.S2(NMHC)": -204,
    "NOx(GT)": 1081,
    "PT08.S3(NOx)": -150,
    "NO2(GT)": 283,
    "PT08.S4(NO2)": 28,
    "PT08.S5(O3)": 1874,
    "T": -38.2,
    "RH": 73.57,
    "AH": -76.35,
    "Date": "07/02/2005",
    "Time": "01.00.00"
  },
  {
    "CO(GT)": 2482.49,
    "PT08.S1(CO)": 1781,
    "NMHC(GT)": 2709,
    "C6H6(GT)": 9.06,
    "PT08.S2(NMHC)": 2777,
    "NOx(GT)": 904,
    "PT08.S3(NOx)": 2936,
    "NO2(GT)": 311,
    "PT08.S4(NO2)": 4531,
    "PT08.S5(O3)": 1787,
    "T": 3480.0,
    "RH": 30.83,
    "AH": 3098.98,
    "Date": "07/02/2005