#PREDICT THE BURNED AREA OF FOREST FIRES WITH NEURAL NETWORKS


In [19]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Layer, Lambda

In [20]:
df = pd.read_csv("forestfires.csv")
df

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,...,0,0,0,0,0,0,0,0,0,large
513,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,...,0,0,0,0,0,0,0,0,0,large
514,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,...,0,0,0,0,0,0,0,0,0,large
515,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,...,0,0,0,0,0,0,0,0,0,small


In [21]:
#As dummy variables are already created, we will remove the month and also day columns
df.drop(["month", "day"], axis = 1, inplace = True)

In [22]:
df["area"] = np.where(df["area"] > 50, 1, 0)

In [23]:
df["area"].value_counts()

0    493
1     24
Name: area, dtype: int64

In [24]:
df.isnull().sum()

FFMC             0
DMC              0
DC               0
ISI              0
temp             0
RH               0
wind             0
rain             0
area             0
dayfri           0
daymon           0
daysat           0
daysun           0
daythu           0
daytue           0
daywed           0
monthapr         0
monthaug         0
monthdec         0
monthfeb         0
monthjan         0
monthjul         0
monthjun         0
monthmar         0
monthmay         0
monthnov         0
monthoct         0
monthsep         0
size_category    0
dtype: int64

In [25]:
df.describe()

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,dayfri,...,monthdec,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,...,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,0.046422,0.16441,...,0.017408,0.038685,0.003868,0.061896,0.032882,0.104449,0.003868,0.001934,0.029014,0.332689
std,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,0.2106,0.371006,...,0.130913,0.193029,0.062137,0.241199,0.1785,0.306138,0.062137,0.04398,0.168007,0.471632
min,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [26]:
#Normalization being done.
def norm_func(i):
     x = (i - i.min()) / (i.max() -	i.min())
     return (x)

In [27]:
predictors = df.iloc[ :, 0:8]
target = df.iloc[ :, 8]

predictors1 = norm_func(predictors)
#data = pd.concat([predictors1,target],axis=1)

In [28]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(predictors1, target, test_size = 0.2, stratify = target)

In [29]:
def prep_model(hidden_dim):
    model = Sequential()
    for i in range(1, len(hidden_dim) - 1):
        if (i == 1):
            model.add(Dense(hidden_dim[i], input_dim = hidden_dim[0], activation = "relu"))
        else:
            model.add(Dense(hidden_dim[i], activation = "relu"))
    model.add(Dense(hidden_dim[-1], kernel_initializer = "normal", activation = "sigmoid"))
    model.compile(loss = "binary_crossentropy", optimizer = "rmsprop", metrics = ["accuracy"])
    return model  

In [30]:
#y_train = pd.DataFrame(y_train)
    
first_model = prep_model([8, 50, 40, 20, 1])
first_model.fit(np.array(x_train), np.array(y_train), epochs = 750)
pred_train = first_model.predict(np.array(x_train))

Epoch 1/750
Epoch 2/750
Epoch 3/750
Epoch 4/750
Epoch 5/750
Epoch 6/750
Epoch 7/750
Epoch 8/750
Epoch 9/750
Epoch 10/750
Epoch 11/750
Epoch 12/750
Epoch 13/750
Epoch 14/750
Epoch 15/750
Epoch 16/750
Epoch 17/750
Epoch 18/750
Epoch 19/750
Epoch 20/750
Epoch 21/750
Epoch 22/750
Epoch 23/750
Epoch 24/750
Epoch 25/750
Epoch 26/750
Epoch 27/750
Epoch 28/750
Epoch 29/750
Epoch 30/750
Epoch 31/750
Epoch 32/750
Epoch 33/750
Epoch 34/750
Epoch 35/750
Epoch 36/750
Epoch 37/750
Epoch 38/750
Epoch 39/750
Epoch 40/750
Epoch 41/750
Epoch 42/750
Epoch 43/750
Epoch 44/750
Epoch 45/750
Epoch 46/750
Epoch 47/750
Epoch 48/750
Epoch 49/750
Epoch 50/750
Epoch 51/750
Epoch 52/750
Epoch 53/750
Epoch 54/750
Epoch 55/750
Epoch 56/750
Epoch 57/750
Epoch 58/750
Epoch 59/750
Epoch 60/750
Epoch 61/750
Epoch 62/750
Epoch 63/750
Epoch 64/750
Epoch 65/750
Epoch 66/750
Epoch 67/750
Epoch 68/750
Epoch 69/750
Epoch 70/750
Epoch 71/750
Epoch 72/750
Epoch 73/750
Epoch 74/750
Epoch 75/750
Epoch 76/750
Epoch 77/750
Epoch 78

In [31]:
#Converting the predicted values to series 
pred_train = pd.Series([i[0] for i in pred_train])

size = ["small", "large"]
pred_train_class = pd.Series(["small"]*413)
pred_train_class[[i > 0.5 for i in pred_train]] = "large"

train = pd.concat([x_train, y_train], axis = 1)
train["area"].value_counts()

0    394
1     19
Name: area, dtype: int64

In [32]:
# Cheking with prediction for training data
from sklearn.metrics import confusion_matrix
train["original_class"] = "small"
train.loc[train["area"] == 1, "original_class"] = "large"
train.original_class.value_counts()
confusion_matrix(pred_train_class, train["original_class"])
np.mean(pred_train_class == pd.Series(train["original_class"]).reset_index(drop = True)) #98.54%
pd.crosstab(pred_train_class,pd.Series(train["original_class"]).reset_index(drop = True))


original_class,large,small
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
large,15,1
small,4,393


In [33]:
#For test data
pred_test = first_model.predict(np.array(x_test))
pred_test = pd.Series([i[0] for i in pred_test])
pred_test_class = pd.Series(["small"]*104)
pred_test_class[[i>0.5 for i in pred_test]] = "large"
test =pd.concat([x_test, y_test], axis = 1)
test["original_class"] = "small"
test.loc[test["area"] == 1, "original_class"] = "large"




In [34]:

test["original_class"].value_counts()

small    99
large     5
Name: original_class, dtype: int64

In [35]:
np.mean(pred_test_class==pd.Series(test["original_class"]).reset_index(drop = True)) # 95.19%

0.9326923076923077

In [36]:
confusion_matrix(pred_test_class,test["original_class"])
pd.crosstab(pred_test_class,pd.Series(test["original_class"]).reset_index(drop = True))

original_class,large,small
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
large,0,2
small,5,97
