In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [91]:
data = pd.read_csv('Datasets/SkewOutliersMulticorr')

In [92]:
data.head()
# data.shape

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,FWI,Classes
0,29.0,57,18,0.0,65.7,3.4,7.6,1.3,0.5,0
1,29.0,61,13,1.25,64.4,4.1,7.6,1.0,0.4,0
2,26.0,82,22,1.25,47.7375,2.5,7.1,0.3,0.1,0
3,25.0,89,13,1.25,47.7375,1.3,6.9,0.0,0.0,0
4,27.0,77,16,0.0,64.8,3.0,14.2,1.2,0.5,0


#### Taking x and y values:

In [93]:
x = data.drop(['FWI','Classes'], axis=1)

In [94]:
x.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI
0,29.0,57,18,0.0,65.7,3.4,7.6,1.3
1,29.0,61,13,1.25,64.4,4.1,7.6,1.0
2,26.0,82,22,1.25,47.7375,2.5,7.1,0.3
3,25.0,89,13,1.25,47.7375,1.3,6.9,0.0
4,27.0,77,16,0.0,64.8,3.0,14.2,1.2


In [95]:
y = data['Classes']

#### Train-Test split():

In [96]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3, random_state=30)

In [97]:
x_train.shape

(170, 8)

#### Feature Scaling:

In [98]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [99]:
x_train = scaler.fit_transform(x_train)

In [100]:
x_test = scaler.transform(x_test)

### Classification Models:

In [101]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#### Logistic Regression:

In [102]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()
log_model.fit(x_train, y_train)
log_pred = log_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, log_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, log_pred))
print("Classification Report: \n", classification_report(y_test, log_pred))

Accuracy:  93.24324324324324
Confusion Matrix: 
 [[29  1]
 [ 4 40]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.88      0.97      0.92        30
           1       0.98      0.91      0.94        44

    accuracy                           0.93        74
   macro avg       0.93      0.94      0.93        74
weighted avg       0.94      0.93      0.93        74



#### SVM Classifier:

In [103]:
from sklearn.svm import SVC
svm_model = SVC()
svm_model.fit(x_train, y_train)
svm_pred = svm_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, svm_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, svm_pred))
print("Classification Report: \n", classification_report(y_test, svm_pred))

Accuracy:  89.1891891891892
Confusion Matrix: 
 [[27  3]
 [ 5 39]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.84      0.90      0.87        30
           1       0.93      0.89      0.91        44

    accuracy                           0.89        74
   macro avg       0.89      0.89      0.89        74
weighted avg       0.89      0.89      0.89        74



#### Naive-Bayes Classifier:

In [104]:
from sklearn.naive_bayes import GaussianNB
NB_model = GaussianNB()
NB_model.fit(x_train, y_train)
NB_pred = NB_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, NB_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, NB_pred))
print("Classification Report: \n", classification_report(y_test, NB_pred))

Accuracy:  91.8918918918919
Confusion Matrix: 
 [[28  2]
 [ 4 40]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.88      0.93      0.90        30
           1       0.95      0.91      0.93        44

    accuracy                           0.92        74
   macro avg       0.91      0.92      0.92        74
weighted avg       0.92      0.92      0.92        74



#### Decision Tree:

In [105]:
from sklearn.tree import DecisionTreeClassifier
tree_model = DecisionTreeClassifier()
tree_model.fit(x_train, y_train)
tree_pred = tree_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, tree_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, tree_pred))
print("Classification Report: \n", classification_report(y_test, tree_pred))

Accuracy:  95.94594594594594
Confusion Matrix: 
 [[29  1]
 [ 2 42]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.94      0.97      0.95        30
           1       0.98      0.95      0.97        44

    accuracy                           0.96        74
   macro avg       0.96      0.96      0.96        74
weighted avg       0.96      0.96      0.96        74



#### Random Forest:

In [106]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(x_train, y_train)
rf_pred = rf_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, rf_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, rf_pred))
print("Classification Report: \n", classification_report(y_test, rf_pred))

Accuracy:  97.2972972972973
Confusion Matrix: 
 [[30  0]
 [ 2 42]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.94      1.00      0.97        30
           1       1.00      0.95      0.98        44

    accuracy                           0.97        74
   macro avg       0.97      0.98      0.97        74
weighted avg       0.97      0.97      0.97        74



#### Xg Boost:

In [107]:
import xgboost
from sklearn.metrics import ConfusionMatrixDisplay

In [108]:
from xgboost import XGBClassifier
boost_model = XGBClassifier()
boost_model.fit(x_train, y_train)
boost_pred = boost_model.predict(x_test)

print("Accuracy: ", accuracy_score(y_test, boost_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, boost_pred))
print("Classification Report: \n", classification_report(y_test, boost_pred))

Accuracy:  97.2972972972973
Confusion Matrix: 
 [[30  0]
 [ 2 42]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.94      1.00      0.97        30
           1       1.00      0.95      0.98        44

    accuracy                           0.97        74
   macro avg       0.97      0.98      0.97        74
weighted avg       0.97      0.97      0.97        74



In [109]:
# ConfusionMatrixDisplay.from_estimator(boost_model, x_test, y_test)

#### Results Summary

| Models     | Accuracy score  |
| ----------- | ----------- |
| Random Forest Classifier  | 97.29 % |
| XGboost classifier     | 97.29 % |
| Decision Tree Classifier  | 95.94 % |
| Logistic Regression | 93.24 % |
| Naive Bayes  | 91.89 % |
| SVM Classifier  | 89.18 %|


### Hyper-Parameter Tuning:

In [110]:
from sklearn.model_selection import GridSearchCV

In [111]:
# params={
#  "learning_rate"    : (np.linspace(0,10, 50)) ,
#  "max_depth"        : (np.linspace(1,50, 25,dtype=int)),
#  "min_child_weight" : [1, 3, 5, 7],
#  "gamma"            : [0.0, 0.1, 0.2 , 0.3, 0.4],
#  "colsample_bytree" : [0.3, 0.4, 0.5 , 0.7]}

# boost_Grid = GridSearchCV(boost_model, params, cv = 2,n_jobs = -1)
# boost_Grid.fit(x_train, y_train)

In [112]:
# params = {
#     "n_estimators" : [90,100,115,130],
#     'criterion': ['gini', 'entropy'],
#     'max_depth' : range(2,20,1),
#     'min_samples_leaf' : range(1,10,1),
#     'min_samples_split': range(2,10,1),
#     'max_features' : ['auto','log2']
# }
# rf = RandomForestClassifier()
# rf_grid = GridSearchCV(rf, params, cv = 2, n_jobs = -1)
# rf_grid.fit(x_train, y_train)

In [113]:
# best_grid_model = rf_grid.best_parameter_
# y_pred = best_grid_model.predict(x_test)

In [114]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
skfold = StratifiedKFold(n_splits= 10,shuffle= True, random_state= 42)

In [116]:
rf = RandomForestClassifier()
cv_rf= cross_val_score(rf, x,y, cv=skfold, scoring='accuracy').mean()
print('RF Model = {:.4f}'.format(cv_rf*100))

RF Model = 98.3833


#### Model Deployment:

In [119]:
rf_tuned_model = RandomForestClassifier(criterion='entropy', max_depth=18, max_features='log2',
                                     min_samples_split=4, n_estimators=115)

In [120]:
rf_tuned_model.fit(x_train, y_train)

RandomForestClassifier(criterion='entropy', max_depth=18, max_features='log2',
                       min_samples_split=4, n_estimators=115)

In [121]:
ypred = rf_tuned_model.predict(x_test)

In [122]:
print("Accuracy: ", accuracy_score(y_test, boost_pred)*100)
print("Confusion Matrix: \n", confusion_matrix(y_test, boost_pred))
print("Classification Report: \n", classification_report(y_test, boost_pred))

Accuracy:  97.2972972972973
Confusion Matrix: 
 [[30  0]
 [ 2 42]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.94      1.00      0.97        30
           1       1.00      0.95      0.98        44

    accuracy                           0.97        74
   macro avg       0.97      0.98      0.97        74
weighted avg       0.97      0.97      0.97        74



### Exporting Pickel File;

In [124]:
import pickle
pickle.dump(rf_tuned_model, open('Classification.pkl', 'wb'))

#### Testing:

In [125]:
data.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,FWI,Classes
0,29.0,57,18,0.0,65.7,3.4,7.6,1.3,0.5,0
1,29.0,61,13,1.25,64.4,4.1,7.6,1.0,0.4,0
2,26.0,82,22,1.25,47.7375,2.5,7.1,0.3,0.1,0
3,25.0,89,13,1.25,47.7375,1.3,6.9,0.0,0.0,0
4,27.0,77,16,0.0,64.8,3.0,14.2,1.2,0.5,0


In [126]:
x.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI
0,29.0,57,18,0.0,65.7,3.4,7.6,1.3
1,29.0,61,13,1.25,64.4,4.1,7.6,1.0
2,26.0,82,22,1.25,47.7375,2.5,7.1,0.3
3,25.0,89,13,1.25,47.7375,1.3,6.9,0.0
4,27.0,77,16,0.0,64.8,3.0,14.2,1.2


In [127]:
reg = pickle.load(open('Regression.pkl', 'rb'))
clas = pickle.load(open('Classification.pkl', 'rb'))
scale = pickle.load(open('Features_Scaled.pkl', 'rb'))

In [129]:
ls = list(x.iloc[4,:])
ls

[27.0, 77.0, 16.0, 0.0, 64.8, 3.0, 14.2, 1.2]

In [130]:
values = np.array(ls).reshape(1,-1)
values

array([[27. , 77. , 16. ,  0. , 64.8,  3. , 14.2,  1.2]])

In [131]:
values = scale.transform(values)

In [132]:
reg.predict(values)

array([0.5])