In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('Datasets/Data_RemoveOutliers')

In [3]:
data.head()
# data.shape

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,FWI,Classes
0,29,57,18,0.0,65.7,3.4,7.6,1.3,0.5,0
1,29,61,13,1.25,64.4,4.1,7.6,1.0,0.4,0
2,26,82,22,1.25,47.7375,2.5,7.1,0.3,0.1,0
3,25,89,13,1.25,47.7375,1.3,6.9,0.0,0.0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,0.5,0


#### Taking x and y values:

In [4]:
x = data.drop(['FWI', 'Classes'], axis=1)

In [5]:
x.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI
0,29,57,18,0.0,65.7,3.4,7.6,1.3
1,29,61,13,1.25,64.4,4.1,7.6,1.0
2,26,82,22,1.25,47.7375,2.5,7.1,0.3
3,25,89,13,1.25,47.7375,1.3,6.9,0.0
4,27,77,16,0.0,64.8,3.0,14.2,1.2


In [6]:
y = data['Classes']

#### Train-Test split():

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=30)

In [8]:
x_train.shape

(183, 8)

#### Feature Scaling:

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [10]:
x_train = scaler.fit_transform(x_train)

In [11]:
x_test = scaler.transform(x_test)

### Classification Models:

In [12]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#### Logistic Regression:

In [13]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()
log_model.fit(x_train, y_train)
log_pred = log_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, log_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, log_pred))
print("Classification Report: \n", classification_report(y_test, log_pred))

Accuracy:  98.36065573770492
Confusion Matrix: 
 [[25  0]
 [ 1 35]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        25
           1       1.00      0.97      0.99        36

    accuracy                           0.98        61
   macro avg       0.98      0.99      0.98        61
weighted avg       0.98      0.98      0.98        61



#### SVM Classifier:

In [14]:
from sklearn.svm import SVC
svm_model = SVC()
svm_model.fit(x_train, y_train)
svm_pred = svm_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, svm_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, svm_pred))
print("Classification Report: \n", classification_report(y_test, svm_pred))

Accuracy:  93.44262295081968
Confusion Matrix: 
 [[23  2]
 [ 2 34]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.92      0.92      0.92        25
           1       0.94      0.94      0.94        36

    accuracy                           0.93        61
   macro avg       0.93      0.93      0.93        61
weighted avg       0.93      0.93      0.93        61



#### Naive-Bayes Classifier:

In [15]:
from sklearn.naive_bayes import GaussianNB
NB_model = GaussianNB()
NB_model.fit(x_train, y_train)
NB_pred = NB_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, NB_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, NB_pred))
print("Classification Report: \n", classification_report(y_test, NB_pred))

Accuracy:  93.44262295081968
Confusion Matrix: 
 [[23  2]
 [ 2 34]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.92      0.92      0.92        25
           1       0.94      0.94      0.94        36

    accuracy                           0.93        61
   macro avg       0.93      0.93      0.93        61
weighted avg       0.93      0.93      0.93        61



#### Decision Tree:

In [16]:
from sklearn.tree import DecisionTreeClassifier
tree_model = DecisionTreeClassifier()
tree_model.fit(x_train, y_train)
tree_pred = tree_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, tree_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, tree_pred))
print("Classification Report: \n", classification_report(y_test, tree_pred))

Accuracy:  98.36065573770492
Confusion Matrix: 
 [[24  1]
 [ 0 36]]
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        25
           1       0.97      1.00      0.99        36

    accuracy                           0.98        61
   macro avg       0.99      0.98      0.98        61
weighted avg       0.98      0.98      0.98        61



#### Random Forest:

In [17]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(x_train, y_train)
rf_pred = rf_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, rf_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, rf_pred))
print("Classification Report: \n", classification_report(y_test, rf_pred))

Accuracy:  96.72131147540983
Confusion Matrix: 
 [[25  0]
 [ 2 34]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.93      1.00      0.96        25
           1       1.00      0.94      0.97        36

    accuracy                           0.97        61
   macro avg       0.96      0.97      0.97        61
weighted avg       0.97      0.97      0.97        61



#### Xg Boost:

In [18]:
import xgboost
from sklearn.metrics import ConfusionMatrixDisplay

In [19]:
from xgboost import XGBClassifier
boost_model = XGBClassifier()
boost_model.fit(x_train, y_train)
boost_pred = boost_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, boost_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, boost_pred))
print("Classification Report: \n", classification_report(y_test, boost_pred))

Accuracy:  98.36065573770492
Confusion Matrix: 
 [[25  0]
 [ 1 35]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        25
           1       1.00      0.97      0.99        36

    accuracy                           0.98        61
   macro avg       0.98      0.99      0.98        61
weighted avg       0.98      0.98      0.98        61



In [20]:
# ConfusionMatrixDisplay.from_estimator(boost_model, x_test, y_test)

#### Results Summary

| Models     | Accuracy score  |
| ----------- | ----------- |
| XGboost classifier     | 98.36 % |
| Decision Tree Classifier  | 98.36 % |
| Random Forest Classifier  | 96.72 % |
| Logistic Regression | 95.08 % |
| Naive Bayes  | 95.08 % |
| SVM Classifier  | 93.44 %|


### Hyper-Parameter Tuning:

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
# params={
#  "learning_rate"    : (np.linspace(0,10, 50)) ,
#  "max_depth"        : (np.linspace(1,50, 25,dtype=int)),
#  "min_child_weight" : [1, 3, 5, 7],
#  "gamma"            : [0.0, 0.1, 0.2 , 0.3, 0.4],
#  "colsample_bytree" : [0.3, 0.4, 0.5 , 0.7]}

# boost_Grid = GridSearchCV(boost_model, params, cv = 2,n_jobs = -1)
# boost_Grid.fit(x_train, y_train)

In [23]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
skfold = StratifiedKFold(n_splits= 10,shuffle= True, random_state= 42)

In [24]:
cv_xgb= cross_val_score(boost_model, x,y, cv=skfold, scoring='accuracy').mean()
print('Tuned XGB = {:.4f}'.format(cv_xgb*100))

Tuned XGB = 98.7833


In [25]:
# params = {
#     "n_estimators" : [90,100,115,130],
#     'criterion': ['gini', 'entropy'],
#     'max_depth' : range(2,20,1),
#     'min_samples_leaf' : range(1,10,1),
#     'min_samples_split': range(2,10,1),
#     'max_features' : ['auto','log2']
# }
# rf_grid = GridSearchCV(rf_model, params, cv = 5, n_jobs = -1)
# rf_grid.fit(x_train, y_train).best_estimator_

### Exporting Pickel File;

In [26]:
import pickle
pickle.dump(boost_model, open('Classification.pkl', 'wb'))

#### Testing:

In [67]:
data.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,FWI,Classes
0,29,57,18,0.0,65.7,3.4,7.6,1.3,0.5,0
1,29,61,13,1.25,64.4,4.1,7.6,1.0,0.4,0
2,26,82,22,1.25,47.7375,2.5,7.1,0.3,0.1,0
3,25,89,13,1.25,47.7375,1.3,6.9,0.0,0.0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,0.5,0


In [68]:
x.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI
0,29,57,18,0.0,65.7,3.4,7.6,1.3
1,29,61,13,1.25,64.4,4.1,7.6,1.0
2,26,82,22,1.25,47.7375,2.5,7.1,0.3
3,25,89,13,1.25,47.7375,1.3,6.9,0.0
4,27,77,16,0.0,64.8,3.0,14.2,1.2


In [69]:
reg = pickle.load(open('Regression.pkl', 'rb'))
clas = pickle.load(open('Classification.pkl', 'rb'))
scale = pickle.load(open('Scaler_Transformed.pkl', 'rb'))

In [70]:
ls = list(x.iloc[1,:])
ls

[29.0, 61.0, 13.0, 1.25, 64.4, 4.1, 7.6, 1.0]

In [71]:
values = np.array(ls).reshape(1,-1)
values

array([[29.  , 61.  , 13.  ,  1.25, 64.4 ,  4.1 ,  7.6 ,  1.  ]])

In [72]:
values = scale.transform(values)

In [73]:
reg.predict(values)

array([0.4])