## Import Libraries

In [26]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pickle

## Load Data

In [11]:
df = pd.read_csv("data\data.csv")

In [22]:
df.head()

Unnamed: 0,footfall,tempMode,AQ,USS,CS,VOC,RP,IP,Temperature,fail
0,0,7,7,1,6,6,36,3,1,1
1,190,1,3,3,5,1,20,4,1,0
2,31,7,2,2,6,1,24,6,1,0
3,83,4,3,4,5,1,28,6,1,0
4,640,7,5,6,4,0,68,6,1,0


## EDA

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 944 entries, 0 to 943
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   footfall     944 non-null    int64
 1   tempMode     944 non-null    int64
 2   AQ           944 non-null    int64
 3   USS          944 non-null    int64
 4   CS           944 non-null    int64
 5   VOC          944 non-null    int64
 6   RP           944 non-null    int64
 7   IP           944 non-null    int64
 8   Temperature  944 non-null    int64
 9   fail         944 non-null    int64
dtypes: int64(10)
memory usage: 73.9 KB


In [14]:
df.isna()

Unnamed: 0,footfall,tempMode,AQ,USS,CS,VOC,RP,IP,Temperature,fail
0,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
939,False,False,False,False,False,False,False,False,False,False
940,False,False,False,False,False,False,False,False,False,False
941,False,False,False,False,False,False,False,False,False,False
942,False,False,False,False,False,False,False,False,False,False


In [21]:
df.isna().sum(axis=0) / df.shape[0]

footfall       0.0
tempMode       0.0
AQ             0.0
USS            0.0
CS             0.0
VOC            0.0
RP             0.0
IP             0.0
Temperature    0.0
fail           0.0
dtype: float64

In [28]:
df.columns

Index(['footfall', 'tempMode', 'AQ', 'USS', 'CS', 'VOC', 'RP', 'IP',
       'Temperature', 'fail'],
      dtype='object')

## Split

In [35]:
numeric_cols = ["footfall","USS", "CS", "VOC", "RP", "IP","Temperature","AQ"]
cat_cols = ["tempMode"]
target = "fail"

In [36]:
X = df.drop(columns=[target])
y = df[target]

In [37]:
num_data = X[numeric_cols]
cat_data = X[cat_cols]

In [38]:
num_data

Unnamed: 0,footfall,USS,CS,VOC,RP,IP,Temperature,AQ
0,0,1,6,6,36,3,1,7
1,190,3,5,1,20,4,1,3
2,31,2,6,1,24,6,1,2
3,83,4,5,1,28,6,1,3
4,640,6,4,0,68,6,1,5
...,...,...,...,...,...,...,...,...
939,0,1,6,4,73,6,24,7
940,0,2,6,6,50,6,24,5
941,0,2,7,5,43,6,24,6
942,0,2,5,6,46,7,24,6


In [39]:
cat_data

Unnamed: 0,tempMode
0,7
1,1
2,7
3,4
4,7
...,...
939,7
940,7
941,3
942,6


## Load Model

In [40]:
scaler = StandardScaler()
scaled_num_data = pd.DataFrame(scaler.fit_transform(num_data), columns=numeric_cols)

In [41]:
scaled_num_data

Unnamed: 0,footfall,USS,CS,VOC,RP,IP,Temperature,AQ
0,-0.283153,-1.402480,0.477610,1.389813,-0.672788,-0.979504,-2.567407,1.860497
1,-0.107558,0.043660,-0.310613,-0.810763,-1.647540,-0.353894,-2.567407,-0.921775
2,-0.254504,-0.679410,0.477610,-0.810763,-1.403852,0.897327,-2.567407,-1.617343
3,-0.206446,0.766730,-0.310613,-0.810763,-1.160164,0.897327,-2.567407,-0.921775
4,0.308326,2.212869,-1.098836,-1.250878,1.276716,0.897327,-2.567407,0.469361
...,...,...,...,...,...,...,...,...
939,-0.283153,-1.402480,0.477610,0.509582,1.581326,0.897327,1.284147,1.860497
940,-0.283153,-0.679410,0.477610,1.389813,0.180120,0.897327,1.284147,0.469361
941,-0.283153,-0.679410,1.265832,0.949698,-0.246334,0.897327,1.284147,1.164929
942,-0.283153,-0.679410,-0.310613,1.389813,-0.063568,1.522937,1.284147,1.164929


In [None]:
with open("data\scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

In [None]:
data_processed = pd.concat([scaled_num_data, cat_data, y], axis=1)

In [None]:
data_processed

Unnamed: 0,footfall,USS,CS,VOC,RP,IP,Temperature,AQ,tempMode,fail
0,-0.283153,-1.402480,0.477610,1.389813,-0.672788,-0.979504,-2.567407,1.860497,7,1
1,-0.107558,0.043660,-0.310613,-0.810763,-1.647540,-0.353894,-2.567407,-0.921775,1,0
2,-0.254504,-0.679410,0.477610,-0.810763,-1.403852,0.897327,-2.567407,-1.617343,7,0
3,-0.206446,0.766730,-0.310613,-0.810763,-1.160164,0.897327,-2.567407,-0.921775,4,0
4,0.308326,2.212869,-1.098836,-1.250878,1.276716,0.897327,-2.567407,0.469361,7,0
...,...,...,...,...,...,...,...,...,...,...
939,-0.283153,-1.402480,0.477610,0.509582,1.581326,0.897327,1.284147,1.860497,7,1
940,-0.283153,-0.679410,0.477610,1.389813,0.180120,0.897327,1.284147,0.469361,7,1
941,-0.283153,-0.679410,1.265832,0.949698,-0.246334,0.897327,1.284147,1.164929,3,1
942,-0.283153,-0.679410,-0.310613,1.389813,-0.063568,1.522937,1.284147,1.164929,6,1


In [None]:
data_processed.to_csv("data\preprocessed.csv", index=False)