In [110]:
import numpy as np
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score , confusion_matrix , roc_auc_score , roc_curve
from pandas_profiling import ProfileReport
from sklearn.svm import SVC

In [67]:
df =pd.read_excel("D:\Internship\datasets\Acoustic_Extinguisher_Fire_Dataset.xlsx",sheet_name='A_E_Fire_Dataset')
df.head()

Unnamed: 0,SIZE,FUEL,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,STATUS
0,1,gasoline,10,96,0.0,75,0
1,1,gasoline,10,96,0.0,72,1
2,1,gasoline,10,96,2.6,70,1
3,1,gasoline,10,96,3.2,68,1
4,1,gasoline,10,109,4.5,67,1


In [68]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17442 entries, 0 to 17441
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   SIZE       17442 non-null  int64  
 1   FUEL       17442 non-null  object 
 2   DISTANCE   17442 non-null  int64  
 3   DESIBEL    17442 non-null  int64  
 4   AIRFLOW    17442 non-null  float64
 5   FREQUENCY  17442 non-null  int64  
 6   STATUS     17442 non-null  int64  
dtypes: float64(1), int64(5), object(1)
memory usage: 954.0+ KB


In [69]:
df.isna().sum()

SIZE         0
FUEL         0
DISTANCE     0
DESIBEL      0
AIRFLOW      0
FREQUENCY    0
STATUS       0
dtype: int64

In [70]:
df.shape

(17442, 7)

In [71]:
df.columns

Index(['SIZE', 'FUEL', 'DISTANCE', 'DESIBEL', 'AIRFLOW', 'FREQUENCY',
       'STATUS'],
      dtype='object')

In [72]:
df['FUEL'].unique()

array(['gasoline', 'thinner', 'kerosene', 'lpg'], dtype=object)

In [73]:
df['FUEL'].value_counts()

gasoline    5130
thinner     5130
kerosene    5130
lpg         2052
Name: FUEL, dtype: int64

In [74]:
df['FUEL']

0        gasoline
1        gasoline
2        gasoline
3        gasoline
4        gasoline
           ...   
17437         lpg
17438         lpg
17439         lpg
17440         lpg
17441         lpg
Name: FUEL, Length: 17442, dtype: object

In [75]:
df =pd.get_dummies(df)
df

Unnamed: 0,SIZE,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,STATUS,FUEL_gasoline,FUEL_kerosene,FUEL_lpg,FUEL_thinner
0,1,10,96,0.0,75,0,1,0,0,0
1,1,10,96,0.0,72,1,1,0,0,0
2,1,10,96,2.6,70,1,1,0,0,0
3,1,10,96,3.2,68,1,1,0,0,0
4,1,10,109,4.5,67,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
17437,7,190,86,2.2,5,0,0,0,1,0
17438,7,190,84,2.0,4,0,0,0,1,0
17439,7,190,80,1.5,3,0,0,0,1,0
17440,7,190,76,0.4,2,0,0,0,1,0


In [76]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17442 entries, 0 to 17441
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SIZE           17442 non-null  int64  
 1   DISTANCE       17442 non-null  int64  
 2   DESIBEL        17442 non-null  int64  
 3   AIRFLOW        17442 non-null  float64
 4   FREQUENCY      17442 non-null  int64  
 5   STATUS         17442 non-null  int64  
 6   FUEL_gasoline  17442 non-null  uint8  
 7   FUEL_kerosene  17442 non-null  uint8  
 8   FUEL_lpg       17442 non-null  uint8  
 9   FUEL_thinner   17442 non-null  uint8  
dtypes: float64(1), int64(5), uint8(4)
memory usage: 885.9 KB


In [77]:
#Dependent and Independant Variable
X = df.drop(columns='STATUS')
y = df['STATUS']

In [78]:
X

Unnamed: 0,SIZE,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,FUEL_gasoline,FUEL_kerosene,FUEL_lpg,FUEL_thinner
0,1,10,96,0.0,75,1,0,0,0
1,1,10,96,0.0,72,1,0,0,0
2,1,10,96,2.6,70,1,0,0,0
3,1,10,96,3.2,68,1,0,0,0
4,1,10,109,4.5,67,1,0,0,0
...,...,...,...,...,...,...,...,...,...
17437,7,190,86,2.2,5,0,0,1,0
17438,7,190,84,2.0,4,0,0,1,0
17439,7,190,80,1.5,3,0,0,1,0
17440,7,190,76,0.4,2,0,0,1,0


In [79]:
y

0        0
1        1
2        1
3        1
4        1
        ..
17437    0
17438    0
17439    0
17440    0
17441    0
Name: STATUS, Length: 17442, dtype: int64

In [80]:
#Train-Test-Split 
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

In [81]:
x_train

Unnamed: 0,SIZE,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,FUEL_gasoline,FUEL_kerosene,FUEL_lpg,FUEL_thinner
9942,5,140,96,1.7,65,0,0,0,1
9624,5,80,106,10.5,48,0,0,0,1
8589,4,80,102,0.0,68,0,0,0,1
16957,7,110,96,0.0,72,0,0,1,0
4807,5,140,109,0.0,72,1,0,0,0
...,...,...,...,...,...,...,...,...,...
11284,1,190,76,0.4,2,0,1,0,0
11964,2,130,96,4.8,24,0,1,0,0
5390,1,50,90,10.3,10,0,0,0,1
860,1,160,86,3.3,4,1,0,0,0


In [82]:
y_train

9942     0
9624     0
8589     0
16957    0
4807     0
        ..
11284    0
11964    0
5390     1
860      0
15795    1
Name: STATUS, Length: 13953, dtype: int64

# Decision Tree Implementation

In [83]:
#Decison Tree Model
dt_model = DecisionTreeClassifier()

In [84]:
dt_model.fit(x_train, y_train)

DecisionTreeClassifier()

In [85]:
dt_model.score(x_test,y_test)

0.9661794210375466

In [86]:
grid_pram = {"criterion":['gini','entropy'],
             "splitter":['best','random'],
             "max_depth" : range(2,40,1),
             "min_samples_split":range(2,10 ,1),
             "min_samples_leaf":range(1,10,1),
             'ccp_alpha':np.random.rand(20)
             }

In [88]:
grid_dt = GridSearchCV(estimator=dt_model,param_grid=grid_pram , cv = 10 , n_jobs=-1)

In [None]:
grid_dt.fit(x_train,y_train)

In [None]:
grid_dt.best_params_

In [89]:
grid_dt_new = DecisionTreeClassifier(criterion='entropy' , max_depth=17 ,min_samples_leaf=1 , min_samples_split=3 , splitter='random',ccp_alpha=0.014)

In [92]:
grid_dt_new.fit(x_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.014, criterion='entropy', max_depth=17,
                       min_samples_split=3, splitter='random')

In [93]:
grid_dt_new.score(x_test,y_test)

0.8526798509601605

In [94]:
y_pred = grid_dt_new.predict(x_train)

In [95]:
confusion_matrix(y_train,y_pred)

array([[5814, 1231],
       [ 793, 6115]], dtype=int64)

# Random Forest Implementation

In [96]:
rf = RandomForestClassifier(n_estimators=5)

In [97]:
rf.fit(x_train, y_train)

RandomForestClassifier(n_estimators=5)

In [98]:
rf.score(x_test,y_test)

0.9624534250501576

In [99]:
rf.estimators_

[DecisionTreeClassifier(max_features='auto', random_state=405495377),
 DecisionTreeClassifier(max_features='auto', random_state=921418842),
 DecisionTreeClassifier(max_features='auto', random_state=510016337),
 DecisionTreeClassifier(max_features='auto', random_state=1368595273),
 DecisionTreeClassifier(max_features='auto', random_state=691545827)]

In [105]:
grid_pram = {
    "n_estimators" : [5,10 , 50 , 100 , 120 , 150],
    'criterion' :['gini' ,'entropy'],
    'max_depth' :range(10),
    'min_samples_leaf' :range(10)
    
}

In [106]:
grid_serach_rf = GridSearchCV(param_grid= grid_pram, cv = 10 , n_jobs=6,verbose=1 ,estimator = rf)

In [107]:
grid_serach_rf.fit(x_train,y_train)

Fitting 10 folds for each of 1200 candidates, totalling 12000 fits


KeyboardInterrupt: 

In [None]:
grid_serach_rf.best_params_

In [102]:
rf_new = RandomForestClassifier(criterion='entropy' , max_depth=9 , min_samples_leaf= 1 , n_estimators=120)

In [103]:
rf_new.fit(x_train,y_train)

RandomForestClassifier(criterion='entropy', max_depth=9, n_estimators=120)

In [104]:
rf_new.score(x_test,y_test)

0.9593006592146747

# SVM Implementation

In [111]:
svc = SVC()

In [112]:
svc.fit(x_train, y_train)

SVC()

In [113]:
svc.score(x_test, y_test)

0.8950988822012038

In [None]:
param ={"kernel":['linear', 'poly', 'rbf', 'sigmoid' ],
        'C':[.1,.4 , .6 , 1,2,3,100,200,500],
        'gamma':[.001,.1,.4,.004,.003]
    } 
svm_grid = GridSearchCV(svc , param_grid=param , verbose=3 )
svm_grid.fit(x_train,y_train)

In [None]:
svm_grid.best_params_

In [114]:
svc1 = SVC(kernel='poly')
svc1.fit(x_train,y_train)

SVC(kernel='poly')

In [115]:
svc1.score(x_test,y_test)

0.8896531957580969