
### Forest Fire Prediction using SVR, Random Forest, and Deep Neural Networks
Data Available at: http://archive.ics.uci.edu/ml/datasets/Forest+Fires

X - x-axis spatial coordinate within the Montesinho park map: 1 to 9

Y - y-axis spatial coordinate within the Montesinho park map: 2 to 9

month - month of the year: "jan" to "dec"

day - day of the week: "mon" to "sun"

FFMC - FFMC index from the FWI system: 18.7 to 96.20

DMC - DMC index from the FWI system: 1.1 to 291.3

DC - DC index from the FWI system: 7.9 to 860.6

ISI - ISI index from the FWI system: 0.0 to 56.10

temp - temperature in Celsius degrees: 2.2 to 33.30

RH - relative humidity in %: 15.0 to 100

wind - wind speed in km/h: 0.40 to 9.40

rain - outside rain in mm/m2 : 0.0 to 6.4

area - the burned area of the forest (in ha): 0.00 to 1090.84


In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
%matplotlib inline
plt.style.use('default')

In [None]:
df = pd.read_csv('/content/drive/My Drive/ForestFires/forestfires.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df['Log-area'] = np.log10(df['area']+1)
for i in df.describe().columns[:-2]:
  df.plot.scatter(i,'Log-area',grid = True)

In [None]:
df.boxplot(column = 'Log-area', by = 'day')

In [None]:
df.boxplot(column = 'Log-area', by = 'month')

In [None]:
enc = LabelEncoder()
enc.fit(df['month'])

In [None]:
enc.classes_

In [None]:
df['month_encoded'] = enc.transform(df['month'])
df.head()

In [None]:
enc.fit(df['day'])

In [None]:
enc.classes_

In [None]:
df['day_encoded'] = enc.transform(df['day'])
df.head()

In [None]:
test_size =0.4 

In [None]:
X_data = df.drop(['area','Log-area','month','day'],axis = 1)
y_data = df['Log-area']
X_train, X_test, y_train, y_test = train_test_split(X_data,y_data, test_size = test_size , )

In [None]:
def rec (m,n,tol):
  if type(m) != 'numpy.ndarray':
    m=np.array(m)
  if type(n) != 'numpy.ndarray':
    n= np.array(n)
  l = m.size
  percent = 0
  for i in range(l):
    if np.abs(10**m[i] - 10**n[i]) <= tot:
      percent +=1
  return 100*(percent/l)

In [None]:
# Define the max tolerance limit for REC curve x-axis
# For this problem this represents the absolute value of error in the prediction of the outcome i.e. area burned
tol_max=20

In [None]:
scaler = StandardScaler()

In [None]:
param_grid = {'C': [0.01,0.1,1, 10], 'epsilon': [10,1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']}

In [None]:
grid_SVR = GridSearchCV(SVR(),param_grid,refit=True,verbose=0,cv=5)
grid_SVR.fit(scaler.fit_transform(X_train),scaler.fit_transform(y_train))

In [None]:
grid_SVR.best_params_

In [None]:
a=grid_SVR.predict(X_test)
print("RMSE for Support Vector Regression:",np.sqrt(np.mean((y_test-a)**2)))

In [None]:
plt.xlabel("Actual area burned")
plt.ylabel("Error")
plt.grid(True)
plt.scatter(10**(y_test),10**(a)-10**(y_test))

In [None]:

plt.title("Histogram of prediction errors\n",fontsize=18)
plt.xlabel("Prediction error ($ha$)",fontsize=14)
plt.grid(True)
plt.hist(10**(a.reshape(a.size,))-10**(y_test),bins=50)

In [None]:
rec_SVR=[]
for i in range(tol_max):
    rec_SVR.append(rec(a,y_test,i))

plt.figure(figsize=(5,5))
plt.title("REC curve for the Support Vector Regressor\n",fontsize=15)
plt.xlabel("Absolute error (tolerance) in prediction ($ha$)")
plt.ylabel("Percentage of correct prediction")
plt.xticks([i*5 for i in range(tol_max+1)])
plt.ylim(-10,100)
plt.yticks([i*20 for i in range(6)])
plt.grid(True)
plt.plot(range(tol_max),rec_SVR)