# Forest Fire Forecast

### Import libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

## Data sourcing

In [2]:
filename = 'data/forestfires.csv'
col_names = [ 'X', 'Y', 'month', 'day', 'FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area' ]

df = pd.read_csv(filename, names = col_names)

print(pd.isnull(df))

         X      Y  month    day   FFMC    DMC     DC    ISI   temp     RH  \
0    False  False  False  False  False  False  False  False  False  False   
1    False  False  False  False  False  False  False  False  False  False   
2    False  False  False  False  False  False  False  False  False  False   
3    False  False  False  False  False  False  False  False  False  False   
4    False  False  False  False  False  False  False  False  False  False   
..     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
512  False  False  False  False  False  False  False  False  False  False   
513  False  False  False  False  False  False  False  False  False  False   
514  False  False  False  False  False  False  False  False  False  False   
515  False  False  False  False  False  False  False  False  False  False   
516  False  False  False  False  False  False  False  False  False  False   

      wind   rain   area  
0    False  False  False  
1    False  False  Fa

### Review data

In [3]:
print("Data shape: {}\n".format(df.shape))

print("Data types:\n{}\n".format(df.dtypes))
df.head()

Data shape: (517, 13)

Data types:
X          int64
Y          int64
month     object
day       object
FFMC     float64
DMC      float64
DC       float64
ISI      float64
temp     float64
RH         int64
wind     float64
rain     float64
area     float64
dtype: object



Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


#### Modify data

In [4]:
df.month.replace(('jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'),
                 (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12),
                 inplace=True)

df.day.replace(('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'),
               (1, 2, 3, 4, 5, 6, 7),
               inplace = True)

df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,3,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,10,2,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,10,6,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,3,5,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,3,7,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [5]:
df.describe()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,4.669246,4.299807,7.475822,4.259188,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,12.847292
std,2.313778,1.2299,2.27599,2.072929,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,63.655818
min,1.0,2.0,1.0,1.0,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,0.0
25%,3.0,4.0,7.0,2.0,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,0.0
50%,4.0,4.0,8.0,5.0,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,0.52
75%,7.0,5.0,9.0,6.0,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,6.57
max,9.0,9.0,12.0,7.0,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,1090.84


In [6]:
df.corr(method = 'pearson')

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
X,1.0,0.539548,-0.065003,-0.024922,-0.021039,-0.048384,-0.085916,0.00621,-0.051258,0.085223,0.018798,0.065387,0.063385
Y,0.539548,1.0,-0.066292,-0.005453,-0.046308,0.007782,-0.101178,-0.024488,-0.024103,0.062221,-0.020341,0.033234,0.044873
month,-0.065003,-0.066292,1.0,-0.050837,0.291477,0.466645,0.868698,0.186597,0.368842,-0.09528,-0.086368,0.013438,0.056496
day,-0.024922,-0.005453,-0.050837,1.0,-0.041068,0.06287,0.000105,0.032909,0.05219,0.092151,0.032478,-0.04834,0.023226
FFMC,-0.021039,-0.046308,0.291477,-0.041068,1.0,0.382619,0.330512,0.531805,0.431532,-0.300995,-0.028485,0.056702,0.040122
DMC,-0.048384,0.007782,0.466645,0.06287,0.382619,1.0,0.682192,0.305128,0.469594,0.073795,-0.105342,0.07479,0.072994
DC,-0.085916,-0.101178,0.868698,0.000105,0.330512,0.682192,1.0,0.229154,0.496208,-0.039192,-0.203466,0.035861,0.049383
ISI,0.00621,-0.024488,0.186597,0.032909,0.531805,0.305128,0.229154,1.0,0.394287,-0.132517,0.106826,0.067668,0.008258
temp,-0.051258,-0.024103,0.368842,0.05219,0.431532,0.469594,0.496208,0.394287,1.0,-0.52739,-0.227116,0.069491,0.097844
RH,0.085223,0.062221,-0.09528,0.092151,-0.300995,0.073795,-0.039192,-0.132517,-0.52739,1.0,0.06941,0.099751,-0.075519


## Algorithms

### LinearRegression

In [49]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, KFold, cross_val_score

array = df.values
X = array[:,0:12]
Y = array[:,12]

num_folds = 10
seed = 42

scores = []
scores.append('max_error')
scores.append('neg_mean_absolute_error')
scores.append('r2')
scores.append('neg_mean_squared_error')

lr_model = LinearRegression()

kfold = KFold(n_splits = num_folds, random_state = seed, shuffle=True)
results = []

for score in scores:
    print(score)
    results.append(cross_val_score(lr_model, X, Y, cv = kfold, scoring = score))
    
msg = "LinearRegression:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())
print(msg)

max_error
neg_mean_absolute_error
r2
neg_mean_squared_error
LinearRegression:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954


### ElasticNet

In [61]:
from sklearn.linear_model import ElasticNet

en_model = ElasticNet()

results = []
for score in scores:
    results.append(cross_val_score(en_model, X, Y, cv = kfold, scoring = score))
    
msg = "ElasticNet:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (results[0].mean(),
                    -results[1].mean(), results[2].mean(), -results[3].mean())
print(msg)

ElasticNet:
max error: -285.594634
mean absolute error: 19.375744
r2: -0.386356
mean squared error: 4033.146283


### Lasso Regression

In [57]:
from sklearn.linear_model import Lasso

lasso_model = Lasso()

results = []
for score in scores:
    results.append(cross_val_score(lasso_model, X, Y, cv = kfold, scoring = score))
    
msg = "Lasso Regression:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())
print(msg)

Lasso Regression:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954


### Ridge Regression

In [62]:
from sklearn.linear_model import Ridge

ridge_model = Ridge()

results = []
for score in scores:
    results.append(cross_val_score(ridge_model, X, Y, cv = kfold, scoring = score))
    
msg = "Ridge Regression:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())
print(msg)

Ridge Regression:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954


### KNeighbors Regression

In [63]:
from sklearn.neighbors import KNeighborsRegressor

knr_model = KNeighborsRegressor()

results = []
for score in scores:
    results.append(cross_val_score(knr_model, X, Y, cv = kfold, scoring = score))
    
msg = "KNeighbors Regression:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())
print(msg)

KNeighbors Regression:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954


### SVR

In [64]:
from sklearn.svm import SVR

svr_model = SVR()

results = []
for score in scores:
    results.append(cross_val_score(svr_model, X, Y, cv = kfold, scoring = score))

msg = "SVR:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())
print(msg)

SVR:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954


### Decision Tree Regression

In [65]:
from sklearn.tree import DecisionTreeRegressor

dtr_model = DecisionTreeRegressor()

results = [] 
for score in scores:
    results.append(cross_val_score(lr_model, X, Y, cv = kfold, scoring = score))

msg = "Decision Tree Regression:\nmax error: %f\nmean absolute error: %f\nr2: %f\nmean squared error: %f" % (result[0].mean(),
                    -result[1].mean(), result[2].mean(), -result[3].mean())

print(msg)

Decision Tree Regression:
max error: -22311.094237
mean absolute error: 1223.554730
r2: -304.912113
mean squared error: 2382.100954
