In [74]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pickle

In [75]:
df = pd.read_csv('data/data.csv', index_col=0)
df

Unnamed: 0,CO(mg/m^3)_initial,Volume(m^3),N_people_MA_prev_10min,Ambient-Air-Pump(L/min),Ambient-Air-Pump_power(%),Ambient-Air-Pump_number,CO(mg/m^3)_final
0,5.655523,23.791121,7.895086,676.278801,57.561797,8,5.556082
1,7.370026,29.490340,0.000000,364.445631,72.454714,5,7.176442
2,6.211477,26.161183,9.270920,165.086766,46.293581,8,6.754297
3,5.615021,13.616384,0.000000,300.197701,0.000000,7,5.546475
4,4.365763,27.108231,0.000000,467.855438,0.000000,3,4.397795
...,...,...,...,...,...,...,...
199995,0.023162,19.710060,0.000000,236.547986,0.000000,4,0.041481
199996,0.026660,22.955374,0.000000,392.463541,0.000000,8,0.084421
199997,0.085897,11.771147,0.000000,518.231715,0.000000,1,0.001508
199998,0.038675,11.564071,0.000000,530.232776,0.000000,4,0.080127


In [76]:
df_X = df[df.columns[:-1]]
df_X

Unnamed: 0,CO(mg/m^3)_initial,Volume(m^3),N_people_MA_prev_10min,Ambient-Air-Pump(L/min),Ambient-Air-Pump_power(%),Ambient-Air-Pump_number
0,5.655523,23.791121,7.895086,676.278801,57.561797,8
1,7.370026,29.490340,0.000000,364.445631,72.454714,5
2,6.211477,26.161183,9.270920,165.086766,46.293581,8
3,5.615021,13.616384,0.000000,300.197701,0.000000,7
4,4.365763,27.108231,0.000000,467.855438,0.000000,3
...,...,...,...,...,...,...
199995,0.023162,19.710060,0.000000,236.547986,0.000000,4
199996,0.026660,22.955374,0.000000,392.463541,0.000000,8
199997,0.085897,11.771147,0.000000,518.231715,0.000000,1
199998,0.038675,11.564071,0.000000,530.232776,0.000000,4


In [77]:
df_y = df[df.columns[-1:]]
df_y

Unnamed: 0,CO(mg/m^3)_final
0,5.556082
1,7.176442
2,6.754297
3,5.546475
4,4.397795
...,...
199995,0.041481
199996,0.084421
199997,0.001508
199998,0.080127


In [78]:
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, shuffle=True, random_state=42)

In [79]:
# try linear regression with sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
reg = LinearRegression().fit(X_train, y_train)
pred = reg.predict(X_test)
true = y_test.values
mean_absolute_error(true, pred)
# save model as pickle
with open('linear_regressor.pkl', 'wb') as f:
    pickle.dump(reg, f)

In [80]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [81]:
# create model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, input_dim=X_train.shape[1], activation='tanh'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(100, activation='tanh'),
    tf.keras.layers.Dropout(0.05),
    tf.keras.layers.Dense(100, activation='tanh'),
    tf.keras.layers.Dropout(0.025),
    tf.keras.layers.Dense(100, activation='tanh'),
    tf.keras.layers.Dense(y_train.shape[1], activation='relu')
])

model.compile(optimizer="Adam", loss='mse', metrics="mae")

In [82]:
# train model
history = model.fit(X_train, y_train, epochs=250, batch_size=1024)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

In [83]:
# plot history with plotly
fig = px.line(
    history.history,
    labels={'index': 'epoch', 'value': 'loss'},
    title='Loss over epochs'
)
fig.show()

In [84]:
# evaluate model
model.evaluate(X_test, y_test)

# make predictions
y_pred = model.predict(X_test)
y_pred = pd.DataFrame(y_pred, columns=df_y.columns)
y_pred.index = y_test.index
y_pred



Unnamed: 0,CO(mg/m^3)_final
372605,4.473078
551204,1.076492
240320,8.837404
47361,0.018809
555362,7.236803
...,...
458271,7.071097
124744,0.022030
720454,1.678560
256458,3.658744


In [85]:
# add prefix to column names
y_pred.columns = 'pred_' + y_pred.columns
y_test.columns = 'true_' + y_test.columns
# compare predictions with actual values
y_compare = pd.concat([y_test, y_pred], axis=1)
y_compare

Unnamed: 0,true_CO(mg/m^3)_final,pred_CO(mg/m^3)_final
372605,4.554405,4.473078
551204,1.042525,1.076492
240320,8.797529,8.837404
47361,0.037184,0.018809
555362,7.604375,7.236803
...,...,...
458271,5.855125,7.071097
124744,0.034554,0.022030
720454,1.771368,1.678560
256458,2.711156,3.658744


In [86]:
# plot predictions
fig = px.line(y_compare.iloc[:100], x= [i for i in range(0,100)], y = ["true_CO(mg/m^3)_final", "pred_CO(mg/m^3)_final"], title='Predictions vs true values')
fig.show()

In [87]:
# fit the model on the test set and save it as pkl
model.fit(X_test, y_test, epochs=500, batch_size=1024)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x7fb50ce35e10>

In [88]:
model.save("predict_model.keras")

In [89]:
# save the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)