In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [3]:
import warnings
import pickle
warnings.filterwarnings("ignore")

In [4]:
data = pd.read_csv("dataset-main.csv")
df = pd.DataFrame(data)

In [5]:
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,class
0,7.0,5.0,apr,sun,81.9,3.0,7.90,3.5,13.4,75.0,1.8,0.0,0.00
1,6.0,3.0,apr,wed,88.0,17.2,43.50,3.8,15.2,51.0,2.7,0.0,0.00
2,4.0,4.0,apr,fri,83.0,23.3,85.30,2.3,16.7,20.0,3.1,0.0,0.00
3,7.0,4.0,aug,sun,91.8,175.1,700.70,13.8,21.9,73.0,7.6,1.0,0.00
4,8.0,6.0,aug,wed,93.1,157.3,666.70,13.5,28.7,28.0,2.7,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
618,1.0,2.0,sep,tue,91.0,129.5,692.60,7.0,18.8,40.0,2.2,0.0,212.88
619,7.0,4.0,jul,mon,89.2,103.9,431.60,6.4,22.6,57.0,4.9,0.0,278.53
620,8.0,6.0,aug,thu,94.8,222.4,698.60,13.9,27.5,27.0,4.9,0.0,746.28
621,6.0,5.0,sep,sat,92.5,121.1,674.40,8.6,25.1,27.0,4.0,0.0,1090.84


In [6]:
df.drop(['X', 'Y','month','day'], axis=1, inplace=True)

In [7]:
df

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,class
0,81.9,3.0,7.90,3.5,13.4,75.0,1.8,0.0,0.00
1,88.0,17.2,43.50,3.8,15.2,51.0,2.7,0.0,0.00
2,83.0,23.3,85.30,2.3,16.7,20.0,3.1,0.0,0.00
3,91.8,175.1,700.70,13.8,21.9,73.0,7.6,1.0,0.00
4,93.1,157.3,666.70,13.5,28.7,28.0,2.7,0.0,0.00
...,...,...,...,...,...,...,...,...,...
618,91.0,129.5,692.60,7.0,18.8,40.0,2.2,0.0,212.88
619,89.2,103.9,431.60,6.4,22.6,57.0,4.9,0.0,278.53
620,94.8,222.4,698.60,13.9,27.5,27.0,4.9,0.0,746.28
621,92.5,121.1,674.40,8.6,25.1,27.0,4.0,0.0,1090.84


In [8]:
df.isnull().values.any().sum()

1

In [9]:
nan_cols = [i for i in df.columns if df[i].isnull().any()]
nan_cols

['class']

In [10]:
df.fillna(df[nan_cols[0]].mean(),inplace=True)
df.isnull().values.any().sum()

0

In [11]:
df['DC'] = df['DC'].astype(float)
df

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,class
0,81.9,3.0,7.90,3.5,13.4,75.0,1.8,0.0,0.000000
1,88.0,17.2,43.50,3.8,15.2,51.0,2.7,0.0,0.000000
2,83.0,23.3,85.30,2.3,16.7,20.0,3.1,0.0,0.000000
3,91.8,175.1,700.70,13.8,21.9,73.0,7.6,1.0,0.000000
4,93.1,157.3,666.70,13.5,28.7,28.0,2.7,0.0,0.000000
...,...,...,...,...,...,...,...,...,...
618,91.0,129.5,692.60,7.0,18.8,40.0,2.2,0.0,212.880000
619,89.2,103.9,431.60,6.4,22.6,57.0,4.9,0.0,278.530000
620,94.8,222.4,698.60,13.9,27.5,27.0,4.9,0.0,746.280000
621,92.5,121.1,674.40,8.6,25.1,27.0,4.0,0.0,1090.840000


In [12]:
df = df.sample(frac = 1).reset_index(drop=True)
df.shape

(623, 9)

In [13]:
df.loc[df['class'] > 0, 'class'] = 1

In [14]:
x = df.iloc[:, 0:8].values
x

array([[9.010e+01, 5.120e+01, 4.241e+02, ..., 4.300e+01, 1.800e+00,
        0.000e+00],
       [7.970e+01, 4.300e+00, 1.520e+01, ..., 5.400e+01, 1.800e+01,
        1.000e-01],
       [8.490e+01, 1.820e+01, 5.500e+01, ..., 7.000e+01, 4.500e+00,
        0.000e+00],
       ...,
       [9.450e+01, 1.394e+02, 6.891e+02, ..., 2.900e+01, 4.900e+00,
        0.000e+00],
       [9.380e+01, 2.300e+01, 4.270e+01, ..., 3.500e+01, 1.500e+01,
        0.000e+00],
       [9.060e+01, 1.820e+01, 3.050e+01, ..., 4.200e+01, 2.100e+01,
        0.000e+00]])

In [15]:
y = df.iloc[:,-1].values.astype(int)
y

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0,

In [44]:
df.to_csv('fire-dataset.csv')

In [38]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=40)
print('Train Data')
print(x_train.shape)
print('Test Data')
print(x_test.shape)

Train Data
(436, 8)
Test Data
(187, 8)


In [39]:
lr = LogisticRegression(max_iter=600, class_weight={0:9.0, 1:17})

In [40]:
lr.fit(x_train, y_train)

LogisticRegression(class_weight={0: 9.0, 1: 17}, max_iter=600)

In [41]:
pred = lr.predict(x_test)
pred

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1])

In [42]:
from sklearn import metrics
print("Acuraccy: ",metrics.accuracy_score(pred,y_test)*100)

Acuraccy:  85.56149732620321


In [43]:
pickle.dump(lr, open('Firemodel.sav','wb'))
pickle.dump(lr, open('Firemodel.pkl','wb'))