In [3]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics

In [4]:

data = pd.read_excel("False Alarm Cases.xlsx")

data.head()

#this is to drop NaN columns 
df = data.dropna(axis = 1)

df.head()

Unnamed: 0,Case No.,Ambient Temperature( deg C),Calibration(days),Unwanted substance deposition(0/1),Humidity(%),H2S Content(ppm),detected by(% of sensors),Spuriosity Index(0/1)
0,Case # 1,-2,226,1,96,9,21,1
1,Case # 2,4,134,1,83,4,77,0
2,Case # 3,7,163,0,69,2,81,0
3,Case # 4,5,162,0,80,6,69,0
4,Case # 5,2,192,1,87,3,67,0


In [5]:
df["Humidity(%)"].min()
df["Humidity(%)"].max()


96

In [6]:
print("Min Humidity - ",df["Humidity(%)"].min())
print("Max Humidity - ",df["Humidity(%)"].max())

Min Humidity -  69
Max Humidity -  96


In [7]:
#In this i am assigning the true alarms in a variable
true_alarm = df[(df["Spuriosity Index(0/1)"]== 0)]

In [8]:
true_alarm.head()

Unnamed: 0,Case No.,Ambient Temperature( deg C),Calibration(days),Unwanted substance deposition(0/1),Humidity(%),H2S Content(ppm),detected by(% of sensors),Spuriosity Index(0/1)
1,Case # 2,4,134,1,83,4,77,0
2,Case # 3,7,163,0,69,2,81,0
3,Case # 4,5,162,0,80,6,69,0
4,Case # 5,2,192,1,87,3,67,0
6,Case # 7,5,220,0,78,6,87,0


In [9]:
print("Min Humidity when Alarm was True - ",true_alarm["Humidity(%)"].min())
print("Max Humidity when Alarm was True - ",true_alarm["Humidity(%)"].max())

Min Humidity when Alarm was True -  69
Max Humidity when Alarm was True -  96


In [10]:
#In this i am assigning the true alarms in a variable
false_alarm = df[(df["Spuriosity Index(0/1)"]== 1)]

In [11]:
false_alarm.head()

Unnamed: 0,Case No.,Ambient Temperature( deg C),Calibration(days),Unwanted substance deposition(0/1),Humidity(%),H2S Content(ppm),detected by(% of sensors),Spuriosity Index(0/1)
0,Case # 1,-2,226,1,96,9,21,1
5,Case # 6,-1,221,1,93,7,38,1
9,Case # 10,-1,217,1,96,9,25,1
13,Case # 14,2,213,1,87,7,31,1
14,Case # 15,8,104,1,93,7,44,1


In [12]:
print("Min Calibration Days when alarm was false - ",false_alarm["Calibration(days)"].min())
print("Max Calibration Days when alarm was false - ",false_alarm["Calibration(days)"].max())

Min Calibration Days when alarm was false -  10
Max Calibration Days when alarm was false -  250


In [13]:
print("Min Calibration Days when alarm was True - ",true_alarm["Calibration(days)"].min())
print("Max Calibration Days when alarm was True - ",true_alarm["Calibration(days)"].max())

Min Calibration Days when alarm was True -  10
Max Calibration Days when alarm was True -  250


In [14]:
#here i will split data as dependent and independent
depnd_var = df.iloc[:,1:7].values
indepnd_var = df.iloc[:,7].values
indepnd_var = indepnd_var.reshape(-1, 1)

In [15]:
depnd_var

array([[ -2, 226,   1,  96,   9,  21],
       [  4, 134,   1,  83,   4,  77],
       [  7, 163,   0,  69,   2,  81],
       ...,
       [  1,  32,   0,  95,   4, 100],
       [  6,  31,   0,  93,   6,  39],
       [ -1, 106,   1,  74,   9,  28]])

In [16]:
indepnd_var

array([[1],
       [0],
       [0],
       ...,
       [0],
       [1],
       [1]])

In [17]:
#here we split data into train and test in 75 25 ratio
train_x ,test_x, train_y, test_y = train_test_split(depnd_var,indepnd_var, test_size = 0.20,random_state = 0)

In [18]:
# so we know that our data doesnot have and missing value now we can procced to scalling 
df.isnull().head()

Unnamed: 0,Case No.,Ambient Temperature( deg C),Calibration(days),Unwanted substance deposition(0/1),Humidity(%),H2S Content(ppm),detected by(% of sensors),Spuriosity Index(0/1)
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False


In [19]:
scaler = StandardScaler()
train_x = scaler.fit_transform(train_x)
test_x = scaler.transform(test_x)
test_x



array([[-1.05692169, -1.70203329,  1.02340671,  1.09856064,  1.52095911,
        -0.33985281],
       [ 1.38262448, -0.7138952 , -0.97712863, -0.07682242, -1.12107434,
        -1.49254616],
       [-1.36186496,  1.262381  ,  1.02340671,  1.35975687,  0.64028129,
         0.04437831],
       ...,
       [-1.05692169, -0.66965021,  1.02340671, -0.07682242,  1.0806202 ,
         1.34115834],
       [-0.75197842, -0.66965021, -0.97712863, -1.25220548,  0.64028129,
        -0.33985281],
       [-0.44703515,  0.48071952,  1.02340671, -0.33801866, -0.24039652,
         1.00495611]])

In [20]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(train_x,train_y)

  y = column_or_1d(y, warn=True)


GaussianNB(priors=None, var_smoothing=1e-09)

In [21]:
y_pred = classifier.predict(test_x)
y_pred

array([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,

In [22]:
cm = confusion_matrix(test_y,y_pred)
cm

array([[302,   1],
       [  0,  76]])

In [23]:
print("Accuracy:",metrics.accuracy_score(test_y, y_pred))

Accuracy: 0.9973614775725593
