# Purpose of the Notebook:

In this notebook, a regressor is used to make a prediction from the previously simulated data.
We decided to use the linear regression.

# Unit Test

## Imports

In [1]:
# import
import pandas as pd
import pickle
import json
import orga_functions as org

import X3_Forecasting as forecast
import X4_ReadJSON as readj

In [2]:
# import model
with open(org.path("LinearRegression_ah_regressor.pkl"), 'rb') as f:
    linr_clf = pickle.load(f)

## JSON

### Test 1 - step by step

1. step: Output of the JSON File in the original form

In [3]:
# write the JSON file into a list of dictinaries
f = open(org.path("01_LiveData.json"))
json_entry = json.load(f)
f.close()

In [4]:
json_entry

[{'CO(GT)': 3.3,
  'PT08.S1(CO)': 1042,
  'NMHC(GT)': 377,
  'C6H6(GT)': 16.59,
  'PT08.S2(NMHC)': 1436,
  'NOx(GT)': 187,
  'PT08.S3(NOx)': 1232,
  'NO2(GT)': 184,
  'PT08.S4(NO2)': 1290,
  'PT08.S5(O3)': 525,
  'T': 1.2,
  'RH': 87.77,
  'AH': 0.47,
  'Date': '07/02/2005',
  'Time': '00.00.00'}]

2. step: Output of the JSON File as a DataFrame

In [5]:
# read and convert json into a DataFrame
json_single_og = readj.json_to_df("01_LiveData.json")
json_single_og

Unnamed: 0,Date,Time,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,07/02/2005,00.00.00,3.3,1042,377,16.59,1436,187,1232,184,1290,525,1.2,87.77,0.47


3. step: Output of the JSON File as a prepared DataFrame

In [6]:
# DataFrame with correct datatime and columns
readj.json_to_prepared_df("01_LiveData.json")

Unnamed: 0,date,co_gt,pt08_s1_co,nmhc_gt,c6h6_gt,pt08_s2_nmhc,nox_gt,pt08_s3_nox,no2_gt,pt08_s4_no2,pt08_s5_o3,t,rh,ah
0,2005-02-07,3.3,1042,377,16.59,1436,187,1232,184,1290,525,1.2,87.77,0.47


4. step: Output of the JSON File as a prepared DataFrame with missing value treatment

In [7]:
# read json as DataFrame & use a feature codebook for missing value treatment
readj.json_to_correct_missing_df("01_LiveData.json", "02_AlleFeatureList.csv")

Unnamed: 0,date,co_gt,pt08_s1_co,nmhc_gt,c6h6_gt,pt08_s2_nmhc,nox_gt,pt08_s3_nox,no2_gt,pt08_s4_no2,pt08_s5_o3,t,rh,ah
0,2005-02-07,3.3,1042,377,16.59,1436,187,1232,184,1290,525,1.2,87.77,0.47


5. step: Output of relevant features of the JSON file as a  DataFrame

In [8]:
# DataFrame with relevant features for the given regressor
js = readj.json_to_ml_features_df("01_LiveData.json", linr_clf)
js

Unnamed: 0_level_0,pt08_s1_co,c6h6_gt,pt08_s2_nmhc,no2_gt,pt08_s4_no2,t,ah
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-02-07,1042,16.59,1436,184,1290,1.2,0.47


6. step: Output of the Prediction-DataFrame based on the JSON file and the given regressor

In [9]:
# the prediction
readj.pred_json_df("01_LiveData.json", linr_clf)

Unnamed: 0_level_0,predicted_ah
future_datetime,Unnamed: 1_level_1
2005-02-07 06:00:00,0.475788


### Test 2

In [10]:
# JSON File in the original form
f = open(org.path("02_LiveData.json"))
json_entry = json.load(f)
f.close()

json_entry

[{'CO(GT)': -89.22,
  'PT08.S1(CO)': 1264,
  'NMHC(GT)': -220,
  'C6H6(GT)': 25.87,
  'PT08.S2(NMHC)': -99,
  'NOx(GT)': 176,
  'PT08.S3(NOx)': 117,
  'NO2(GT)': 132,
  'PT08.S4(NO2)': 315,
  'PT08.S5(O3)': 1676,
  'T': -224.44,
  'RH': 77.01,
  'AH': -192.41,
  'Date': '07/02/2005',
  'Time': '01.00.00'}]

entire data preparation and prediction in one step

In [11]:
# Output of the Prediction-DataFrame based on the JSON file and the given regressor
readj.pred_json_df("02_LiveData.json", linr_clf)

Unnamed: 0_level_0,predicted_ah
future_datetime,Unnamed: 1_level_1
2005-02-07 07:00:00,1.118451


### Test 3

In [12]:
# JSON File in the original form
f = open(org.path("03_LiveData.json"))
json_entry = json.load(f)
f.close()

json_entry

[{'CO(GT)': 43.95,
  'PT08.S1(CO)': 1240,
  'NMHC(GT)': 3014,
  'C6H6(GT)': 19.59,
  'PT08.S2(NMHC)': 4305,
  'NOx(GT)': 412,
  'PT08.S3(NOx)': 2001,
  'NO2(GT)': 15,
  'PT08.S4(NO2)': 3555,
  'PT08.S5(O3)': 1775,
  'T': 3254.33,
  'RH': 25.45,
  'AH': 1415.77,
  'Date': '07/02/2005',
  'Time': '02.00.00'}]

In [13]:
# Output of the Prediction-DataFrame based on the JSON file and the given regressor
readj.pred_json_df("03_LiveData.json", linr_clf)

Unnamed: 0_level_0,predicted_ah
future_datetime,Unnamed: 1_level_1
2005-02-07 08:00:00,1.09873
