In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler

In [11]:
# create 10000 random numbers between 0 and 0.1
np.random.seed(0)
x = np.random.rand(10000) * 0.1
df = pd.DataFrame(x, columns=["CO(mg/m^3)"])
# create a column People with random numbers between 0 and 10
df["People"] = np.random.randint(0, 30, size=(10000, 1))
# create a column with People Moving average with window 6
df["People_MA"] = df["People"].rolling(window=6).mean()
# random value between 0.02 and 0.38 (m^3)CO person per hour
df["CO produced(mg)"] = df["People_MA"] * ( (np.random.rand(10000) * 0.36 + 0.02)/60 ) * 1144
df["Suction Pump"] = np.random.rand(10000) * 1.0
df["CO removed(mg)"] =  df["CO produced(mg)"] - (df["CO produced(mg)"]  * (1 - df["Suction Pump"] * 0.1))

df.head(20)

Unnamed: 0,CO(mg/m^3),People,People_MA,CO produced(mg),Suction Pump,CO removed(mg)
0,0.054881,28,,,0.828481,
1,0.071519,8,,,0.362649,
2,0.060276,27,,,0.800096,
3,0.054488,28,,,0.08438,
4,0.042365,1,,,0.875983,
5,0.064589,2,15.666667,100.552696,0.47514,4.777664
6,0.043759,29,15.833333,69.023773,0.993702,6.858909
7,0.089177,15,17.0,48.889604,0.30323,1.48248
8,0.096366,10,14.166667,13.323008,0.63834,0.850461
9,0.038344,0,9.5,67.711902,0.713824,4.83344


In [None]:
# replace Success Pump values with random numbers between 0 and 1
df["Suction Pump"] = df["Suction Pump"].apply(lambda x: np.random.uniform(0, 1) if x == 0 else x)
# decrese the CO(mg/m^3) values a percentage determined by the Suction Pump values
df["CO(mg/m^3)"] = df["CO(mg/m^3)"] * (1 - df["Suction Pump"] * 0.1)

In [None]:
'CO(mg/m^3)'	'People'	'Suction Pump'

In [275]:
# plot all the curves with plotly
x= 0
fig = px.line(df_normalized_2.iloc[x:x+96], #x="Time", y=["CO(mg/m^3)", "Delta CO(mg/m^3)", ' People', 'Suction Pump'],
              markers=True, title="Normalized 2 Air Quality Data")
fig.show()

In [279]:
# add a row with People and Suction Pump = 0 and CO(mg/m^3) = 0.1 + random noise
df_neutral = pd.DataFrame({'CO(mg/m^3)': [0.05 for i in range(1000)], 'People': [0.0 for i in range(1000)], 'Suction Pump': [0.0 for i in range(1000)]})
# add random noise to the CO(mg/m^3) column
df_neutral["CO(mg/m^3)"] = df_neutral["CO(mg/m^3)"] + np.random.normal(0, 0.05, 1000)
# negative values to 0
df_neutral["CO(mg/m^3)"] = df_neutral["CO(mg/m^3)"].apply(lambda x: x if x > 0 else 0)
df_neutral

Unnamed: 0,CO(mg/m^3),People,Suction Pump
0,0.075034,0.0,0.0
1,0.003099,0.0,0.0
2,0.022322,0.0,0.0
3,0.066063,0.0,0.0
4,0.074102,0.0,0.0
...,...,...,...
995,0.028895,0.0,0.0
996,0.011514,0.0,0.0
997,0.081240,0.0,0.0
998,0.048168,0.0,0.0


In [281]:
df = pd.concat([df_neutral, df], ignore_index=True)
df

Unnamed: 0,CO(mg/m^3),People,Suction Pump
0,0.075034,0.0,0.000000
1,0.003099,0.0,0.000000
2,0.022322,0.0,0.000000
3,0.066063,0.0,0.000000
4,0.074102,0.0,0.000000
...,...,...,...
10328,3.023261,6.0,0.247546
10329,2.323334,4.0,0.319441
10330,2.313576,2.0,0.360099
10331,1.992810,3.0,0.510426


In [282]:
df.describe()

Unnamed: 0,CO(mg/m^3),People,Suction Pump
count,10333.0,10333.0,10333.0
mean,1.840099,3.424562,0.458219
std,1.412174,4.370125,0.340231
min,0.0,0.0,0.0
25%,0.780497,0.0,0.158373
50%,1.576571,0.0,0.441858
75%,2.646801,7.0,0.792993
max,11.074714,13.0,1.0


In [283]:
df.to_csv("../data/data.csv")