In [1]:
import pandas as pd

import seaborn as sns

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error

In [2]:
df=pd.read_csv("New_data_sample.csv")

In [3]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,6,5,4,4,81.5,9.1,55.2,2.7,5.8,54,5.8,0.0,4.61
1,4,4,3,1,87.6,52.2,103.8,5.0,11.0,46,5.8,0.0,0.0
2,3,4,8,1,91.5,145.4,608.2,10.7,17.1,43,5.4,0.0,0.0
3,6,5,4,4,81.5,9.1,55.2,2.7,5.8,54,5.8,0.0,10.93
4,3,3,9,6,92.2,102.3,751.5,8.4,24.2,27,3.1,0.0,6.58


In [5]:
#Assigning Dependent and independent variables
X=df.drop('area',axis=1)
Y=df['area']

In [6]:
#Selection of best model by Testing their Accuracy scores
num_instances = len(X)

models = []
models.append(('GradientBoostRegressor', GradientBoostingRegressor()))
models.append(('RandomForestRegressor',RandomForestRegressor()))
models.append(('ExtraTreesRegressor', ExtraTreesRegressor()))
models.append(('Decision Tree', DecisionTreeRegressor()))
models.append(('KNN Regressor', KNeighborsRegressor()))

# Evaluations
results = []
names = []
scoring = []

for name, model in models:
    # Fit the model
    model.fit(X, Y)
    
    predictions = model.predict(X)
    
    # Evaluate the model
    score = explained_variance_score(Y, predictions)
    mae = mean_absolute_error(predictions, Y)
    # print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    results.append(mae)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, score, mae)
    print(msg)

GradientBoostRegressor: 0.959011 (8.552991)
RandomForestRegressor: 0.985782 (2.552747)
ExtraTreesRegressor: 0.999933 (0.098450)
Decision Tree: 0.999933 (0.098450)
KNN Regressor: 0.597541 (11.348178)


In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = 0.2 , random_state=0)

In [8]:
#Since Extra Tree Regressor and Extra Tree Regressor Performs Better:
#1) Extra Tree Regressor
re = ExtraTreesRegressor(criterion='mse',random_state=0)
re.fit(x_train,y_train)

ExtraTreesRegressor(random_state=0)

In [9]:
test_accuracy=re.score(x_test,y_test)
train_accuracy=re.score(x_train,y_train)
print("Using Extra Trees Regressor: ")
print("Test Accuracy: {:.2f} %".format(test_accuracy*100))
print("Train Accuracy: {:.2f} %".format(train_accuracy*100))


Using Extra Trees Regressor: 
Test Accuracy: 93.29 %
Train Accuracy: 100.00 %


In [10]:
#2) Decision Tree Regressor
DTR = DecisionTreeRegressor(criterion='mse',random_state=0)
DTR.fit(x_train,y_train)

DecisionTreeRegressor(random_state=0)

In [12]:
test_accuracy1=DTR.score(x_test,y_test)
train_accuracy1=DTR.score(x_train,y_train)
print("Using Decision Tree Regressor: ")
print("Test Accuracy: {:.2f} %".format(test_accuracy1*100))
print("Train Accuracy: {:.2f} %".format(train_accuracy1*100))

Using Decision Tree Regressor: 
Test Accuracy: 99.10 %
Train Accuracy: 100.00 %


In [13]:
#Input Format
X.dtypes

X          int64
Y          int64
month      int64
day        int64
FFMC     float64
DMC      float64
DC       float64
ISI      float64
temp     float64
RH         int64
wind     float64
rain     float64
dtype: object

In [14]:
# Categorising Forest Fire Damage Function
def area_cat(area):
    if area == 0.0:
        return "No damage"
    elif area <= 1:
        return "Low Damage"
    elif area <= 25:
        return "Moderate "
    elif area <= 100:
        return "High"
    else:
        return "Very High"

In [15]:
#Prediction Function
def showPrediction(test_data):
    pred=DTR.predict([test_data])
    print("Area Burned: {} ha".format(pred) )
    print("Predicted: ",area_cat(pred))

In [16]:
#Predicted values for the test_values
pred=DTR.predict(x_test)
for i in pred:
    print("{:.2f} {}".format(i,area_cat(i)))

0.00 No damage
26.00 High
6.54 Moderate 
6.30 Moderate 
0.00 No damage
36.85 High
0.00 No damage
1.61 Moderate 
1.43 Moderate 
12.64 Moderate 
2.47 Moderate 
0.00 No damage
0.00 No damage
42.87 High
0.00 No damage
0.00 No damage
6.44 Moderate 
0.00 No damage
0.00 No damage
0.00 No damage
746.28 Very High
37.71 High
0.00 No damage
3.07 Moderate 
6.43 Moderate 
0.90 Low Damage
1.36 Moderate 
0.00 No damage
37.71 High
0.00 No damage
8.85 Moderate 
54.29 High
3.07 Moderate 
0.00 No damage
0.00 No damage
0.00 No damage
0.00 No damage
0.00 No damage
0.00 No damage
95.18 High
0.00 No damage
0.00 No damage
6.43 Moderate 
0.00 No damage
86.45 High
0.00 No damage
0.00 No damage
0.00 No damage
3.64 Moderate 
0.00 No damage
1.09 Moderate 
1.75 Moderate 
8.98 Moderate 
0.00 No damage
8.31 Moderate 
0.00 No damage
0.84 Low Damage
0.00 No damage
1.19 Moderate 
0.00 No damage
6.36 Moderate 
2.47 Moderate 
10.08 Moderate 
95.18 High
0.00 No damage
0.00 No damage
0.00 No damage
2.00 Moderate 
0.00 No da

In [17]:
#Testing Data:
test_data1 = [8,5,8,3,93.1,157.3,666.7,13.5,24,36,3.1,0]
showPrediction(test_data1)

test_data2 = [4,5,3,5,91.7,33.3,77.5,9,17.2,26,4.5,0]
showPrediction(test_data2)

Area Burned: [0.24] ha
Predicted:  Low Damage
Area Burned: [0.] ha
Predicted:  No damage
