In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder,PowerTransformer, StandardScaler, MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

import pickle
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
df = pd.read_csv('forest_outliers_removed1')
df.drop(columns=['Unnamed: 0'], inplace=True)
df['classes']=df['classes'].apply(lambda x :1 if x == 'fire' else 0)

In [3]:
X = df.drop(columns=['classes','year'],axis=1)
y= df['classes']

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

(183, 12) (183,) (61, 12) (61,)


In [5]:
#Using LogisticRegression
lr = LogisticRegression()  
pipe = Pipeline([
    ('lr', lr)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9672131147540983

In [6]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[28  1]
 [ 1 31]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        29
           1       0.97      0.97      0.97        32

    accuracy                           0.97        61
   macro avg       0.97      0.97      0.97        61
weighted avg       0.97      0.97      0.97        61



In [7]:
#Using Support Vector
svc = SVC()  
pipe = Pipeline([
    ('svc', svc)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9016393442622951

In [8]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[24  5]
 [ 1 31]]
              precision    recall  f1-score   support

           0       0.96      0.83      0.89        29
           1       0.86      0.97      0.91        32

    accuracy                           0.90        61
   macro avg       0.91      0.90      0.90        61
weighted avg       0.91      0.90      0.90        61



In [9]:
#Using GaussianNB
gaussian = GaussianNB()
pipe = Pipeline([
    ('nb', gaussian)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9672131147540983

In [10]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[28  1]
 [ 1 31]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        29
           1       0.97      0.97      0.97        32

    accuracy                           0.97        61
   macro avg       0.97      0.97      0.97        61
weighted avg       0.97      0.97      0.97        61



In [11]:
#Using KNN Neighbors
knn = KNeighborsClassifier()  
pipe = Pipeline([
    ('svc', knn)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9016393442622951

In [12]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[24  5]
 [ 1 31]]
              precision    recall  f1-score   support

           0       0.96      0.83      0.89        29
           1       0.86      0.97      0.91        32

    accuracy                           0.90        61
   macro avg       0.91      0.90      0.90        61
weighted avg       0.91      0.90      0.90        61



In [13]:
#Using Decision Tree
tree = DecisionTreeClassifier()
pipe = Pipeline([
    ('tree', tree)
    ])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

0.9836065573770492

In [14]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[28  1]
 [ 0 32]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        29
           1       0.97      1.00      0.98        32

    accuracy                           0.98        61
   macro avg       0.98      0.98      0.98        61
weighted avg       0.98      0.98      0.98        61



In [15]:
#Using Random Forest Tree
rf = RandomForestClassifier()
pipe = Pipeline([
    ('rf', rf)
    ])
pipe.fit(X_train.values, y_train.values)
y_pred = pipe.predict(X_test.values)
accuracy_score(y_test, y_pred)


0.9836065573770492

In [16]:
#Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n",confusion_mat)
print(classification_report(y_test, y_pred))

Confusion matrix: 
 [[28  1]
 [ 0 32]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        29
           1       0.97      1.00      0.98        32

    accuracy                           0.98        61
   macro avg       0.98      0.98      0.98        61
weighted avg       0.98      0.98      0.98        61



In [17]:
y_train.values

array([1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1], dtype=int64)

In [18]:
results = pd.DataFrame({
    'Model': ['Logistic Regression','Support Vector Machines', 'Naive Bayes','KNN' ,'Decision Tree','Random Forest'],
    'Score': [0.93,0.85,0.81,0.93,0.97,accuracy_score(y_test, y_pred)]})

result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
result_df.head(9)

Unnamed: 0_level_0,Model
Score,Unnamed: 1_level_1
0.983607,Random Forest
0.97,Decision Tree
0.93,Logistic Regression
0.93,KNN
0.85,Support Vector Machines
0.81,Naive Bayes


In [19]:
import json
result = X_test.to_json(orient="records")
parsed = json.loads(result)

In [20]:
### Creating pickle file
pickle.dump(pipe,open('models/pipe_class2.pkl','wb'))

In [21]:
 ### Preparing data to create batch prediction
import pymongo
client = pymongo.MongoClient("mongodb+srv://mongodb:mongodb@cluster0.oxgpt.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")

In [22]:
db = client.batch_data
print(db)

Database(MongoClient(host=['cluster0-shard-00-02.oxgpt.mongodb.net:27017', 'cluster0-shard-00-01.oxgpt.mongodb.net:27017', 'cluster0-shard-00-00.oxgpt.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-th8ou3-shard-0', tls=True), 'batch_data')


In [23]:
coll = db['classification_batch']

In [24]:
dict_test = {'day': 24, 'month': 6, 'RH': 68, 'Ws': 16, 'Rain': 0, 'FFMC': 85, 'DMC': 10, 'DC': 17,
                 'ISI': 4, 'BUI': 9, 'FWI': 5.3, 'classes': 0}

In [25]:
db.list_collection_names()

['classification_batch', 'regression_batch_data', 'regression_batch']

In [26]:
coll.insert_many(parsed)

<pymongo.results.InsertManyResult at 0x1bd5f990480>

In [27]:
# Testing created Pipe

In [28]:
pickle_model = pickle.load(open('models/pipe_class2.pkl','rb'))

In [29]:
dict_test ={'day':1, 'month':6, 'Temperature':26,'RH':57, 'Ws':18.0, 'Rain':0.00, 'FFMC':65.7000,
       'DMC':3.4, 'DC':7.6, 'ISI':1.3, 'BUI':3.4, 'FWI':0.5}

In [30]:
list(dict_test.values())

[1, 6, 26, 57, 18.0, 0.0, 65.7, 3.4, 7.6, 1.3, 3.4, 0.5]

In [31]:
input = np.array(list(dict_test.values())).reshape(1,12)

In [32]:
pipe.predict(input)[0]

0