In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('kag_feeds.csv')

In [3]:
df.columns

Index(['   MQ-3 (Alcohol Sensor)(mg/L)', ' MQ-4 (Methane Sensor)(ppm)',
       ' MQ-7 (CO Sensor)(ppm)', ' MQ-8 (Hydrogen Sensor)(ppm)',
       ' MQ-9 (CO Sensor)(ppm)', ' MQ-135 (Air Quality Sensor)(ppm)',
       ' Timestamp'],
      dtype='object')

In [4]:
df = df.rename(columns={' MQ-7 (CO Sensor)(ppm)':'field1',' MQ-8 (Hydrogen Sensor)(ppm)':'field2'})
df = df[['field1','field2']]
df

Unnamed: 0,field1,field2
0,295.90,560.55
1,289.06,554.69
2,290.04,553.71
3,293.95,554.69
4,285.16,547.85
...,...,...
7099,406.28,327.05
7100,279.12,391.13
7101,384.76,309.03
7102,294.37,276.92


In [5]:
df['pred1'] = df['field1'].apply(lambda x: "Hazardous" if x>220 else "Non Hazardous")

In [6]:
df.pred1.value_counts()

Non Hazardous    5837
Hazardous        1267
Name: pred1, dtype: int64

In [7]:
df['pred2'] = df['field2'].apply(lambda x: "Hazardous" if x>198 else "Non Hazardous")

In [8]:
df.pred2.value_counts()

Non Hazardous    5363
Hazardous        1741
Name: pred2, dtype: int64

In [9]:
# from sklearn.preprocessing import LabelEncoder
# le = LabelEncoder()
# df.pred1 = le.fit_transform(df.pred1)
# df.pred2 = le.fit_transform(df.pred2)

In [10]:
df['pred1'] = df['field1'].apply(lambda x : True if x>220 else False)
df['pred2'] = df['field2'].apply(lambda x : True if x>198 else False)

In [11]:
df[df['field2']<198]

Unnamed: 0,field1,field2,pred1,pred2
1272,211.91,197.27,False,False
1275,211.91,197.27,False,False
1276,211.91,197.27,False,False
1277,210.94,196.29,False,False
1278,210.94,196.29,False,False
...,...,...,...,...
7077,197.27,174.80,False,False
7078,198.24,174.80,False,False
7079,197.27,174.80,False,False
7080,197.27,174.80,False,False


In [12]:
df.dtypes

field1    float64
field2    float64
pred1        bool
pred2        bool
dtype: object

In [13]:
df

Unnamed: 0,field1,field2,pred1,pred2
0,295.90,560.55,True,True
1,289.06,554.69,True,True
2,290.04,553.71,True,True
3,293.95,554.69,True,True
4,285.16,547.85,True,True
...,...,...,...,...
7099,406.28,327.05,True,True
7100,279.12,391.13,True,True
7101,384.76,309.03,True,True
7102,294.37,276.92,True,True


In [14]:
df['fpred'] = df['pred1'] & df['pred2']

In [15]:
df['fpred_val'] = df['fpred'].apply(lambda x : 1 if x==True else 0)

In [16]:
df

Unnamed: 0,field1,field2,pred1,pred2,fpred,fpred_val
0,295.90,560.55,True,True,True,1
1,289.06,554.69,True,True,True,1
2,290.04,553.71,True,True,True,1
3,293.95,554.69,True,True,True,1
4,285.16,547.85,True,True,True,1
...,...,...,...,...,...,...
7099,406.28,327.05,True,True,True,1
7100,279.12,391.13,True,True,True,1
7101,384.76,309.03,True,True,True,1
7102,294.37,276.92,True,True,True,1


In [17]:
df.fpred.value_counts()

False    5894
True     1210
Name: fpred, dtype: int64

In [18]:
df.fpred.value_counts()

False    5894
True     1210
Name: fpred, dtype: int64

In [19]:
df = df.dropna()

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [21]:
X_train,X_test,y_train,y_test = train_test_split(df[['field1','field2']],df.fpred_val,test_size=0.2)

In [22]:
X_train.isnull()

Unnamed: 0,field1,field2
6026,False,False
5034,False,False
3432,False,False
2131,False,False
6541,False,False
...,...,...
5185,False,False
5729,False,False
941,False,False
1874,False,False


In [23]:
from sklearn.preprocessing import StandardScaler
scaler  = StandardScaler()
X_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)


In [24]:
X_train

array([[-0.4045841 , -0.46765203],
       [-0.4045841 , -0.46765203],
       [-0.33803382, -0.48559721],
       ...,
       [ 0.79309385,  0.85553095],
       [-0.20493325, -0.25304222],
       [-0.44910238, -0.53924967]])

In [25]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,y_train)

In [26]:
model.score(x_test,y_test)

0.9634060520760028

In [27]:
import pickle

# save the iris classification model as a pickle file
model_pkl_file = "model.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(model, file)

In [28]:
import joblib
joblib.dump(scaler, 'scaler_model.joblib')

['scaler_model.joblib']