Importing Required Libraries

In [0]:
from pandas import read_csv
import numpy as np
from numpy import mean, absolute
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error as mse, mean_absolute_error as mae

Loading Dataset

In [0]:
# loading the dataset
data = read_csv("./forestfires.csv")

Encoding Months and Days

In [0]:
encode_months = {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 
                 'may': 5, 'jun': 6, 'jul': 7, 'aug': 8, 
                 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12}
encode_days = {'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, 
               'fri': 5, 'sat': 6, 'sun': 7 }
data = data.replace({'month': encode_months, 'day': encode_days});

Split data into input (X) and output (Y) variables

In [0]:
X = data.iloc[:,0:12]
Y = data.iloc[:,12].values

Standardizing the Data

In [0]:
def standardData(z, mean, std): 
    z_standard = (z - mean)/ std
    return z_standard

X = standardData(X,X.mean(),X.std())

Feature Extraction using PCA

In [0]:
#Retaining 95% of data post PCA.
def enable_pca(X):
  pca = PCA(.95)
  pca.fit(X)
  X_pca = pca.transform(X)
  print("PCA run succesfully. %.2f Features extracted." % X_pca.shape[1])
  return X_pca

Splitting data into Training and Testing sets

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3,shuffle=False )

Function for calculating Negative Log Likelihood

In [0]:
def NLL(Y,Y_pred):
  n=Y.shape[0]
  variance = np.var(Y_pred, ddof=1)
  first_term= (n/2)*np.log(2*np.pi*variance)
  second_term=0
  for i in range(n):
    second_term+= (Y[i]-Y_pred[i])**2 
  second_term=second_term/(2*variance)
  NLL = first_term+second_term
  return NLL

Function for implementing optimised AdaBoost Regressor

In [0]:
def AdaBoost(nest,lo,lr):
  AdB = AdaBoostRegressor(random_state=0, n_estimators=nest, loss=lo, learning_rate=lr)
  AdB.fit(X_train, y_train)
  y_pred = AdB.predict(X_test)
  print("AdaBoost Regression")
  return y_pred

Function for running PCA and AdaBoost on the Dataset and printing the result metrics

In [20]:
X= enable_pca(X)
y_pred = AdaBoost(100,'square',1)

print("Root Mean Square Error is:" , np.sqrt(mse(y_test,y_pred)))
print("Mean Absolute Error is:" , mae(y_test,y_pred))
print("Mean Absolute Deviation is:" ,mean(absolute(y_pred - mean(y_pred))))
print("Negative Log Likelihood is: ",NLL(y_test,y_pred))

PCA run succesfully. 10.00 Features extracted.
AdaBoost Regression
Root Mean Square Error is: 65.16242022177141
Mean Absolute Error is: 21.908538235243984
Mean Absolute Deviation is: 9.6334939909227
Negative Log Likelihood is:  2272.719994552467
