In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv('C:/Users/rishi/Desktop/ML/Datasets/Algerian_forest_fires_cleaned_dataset.csv')
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire,0
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire,0
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire,0
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire,0
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire,0


In [3]:
df.drop(['day', 'month', 'year'], axis = 1, inplace = True)
df.Classes = np.where(df.Classes.str.contains('not fire'), 0, 1)

In [4]:
X = df.drop('FWI', axis = 1)
y = df.FWI

In [5]:
from sklearn.model_selection import train_test_split as tts

X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.33, random_state = 10)

In [6]:
def correlation(dataset, threshold) :
    col_corr = set()
    corr_matrix = dataset.corr()

    for i in range(len(corr_matrix.columns)) :
        for j in range(i) :
            if abs(corr_matrix.iloc[i, j])  > threshold :
                colname = corr_matrix.columns[i]
                col_corr.add(colname)

    return col_corr


corr_features = correlation(X_train, 0.85)

In [7]:
X_train.drop(corr_features, axis = 1, inplace = True)
X_test.drop(corr_features, axis = 1, inplace = True)

In [8]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
from sklearn.linear_model import Ridge

regressor = Ridge()
regressor.fit(X_train, y_train)
y_test_prediction = regressor.predict(X_test)

print("Slope or Coefficient:", regressor.coef_)
print("Intercept:", regressor.intercept_)

Slope or Coefficient: [-0.03951681 -0.21855048  0.07061907  0.06772119 -0.79895222  3.3909805
  4.46527817  0.610526   -0.40643988]
Intercept: 6.845061728395063


In [10]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, y_test_prediction) # Actual data and Predicted Data
mae = mean_absolute_error(y_test, y_test_prediction)
rmse = np.sqrt(mse)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)

Mean Squared Error: 0.8313788230209306
Mean Absolute Error: 0.6781225131218179
Root Mean Squared Error: 0.9117997713428813


In [11]:
from sklearn.metrics import r2_score

score = r2_score(y_test, y_test_prediction)
print("Accuracy of r squared:", score)

n = (len(y_test) - 1) # No of observations
k = X_test.shape[1] # No of predictor variables
adjusted_score = 1 - ((1 - score) * (n - 1)  / (n - k - 1))
print("Accuracy of Adjusted r squared:", adjusted_score)

Accuracy of r squared: 0.9863698710809673
Accuracy of Adjusted r squared: 0.9846174259342345


## Ridge Cross Validation

In [15]:
 from sklearn.linear_model import RidgeCV

ridgeCV = RidgeCV(cv = 5)
ridgeCV.fit(X_train, y_train)
y_test_prediction = ridgeCV.predict(X_test)

mse = mean_squared_error(y_test, y_test_prediction)
print("Mean Squared Error:", mse)

score = r2_score(y_test, y_test_prediction)
print("Accuracy of r squared:", score)

Mean Squared Error: 1.312624031941085
Accuracy of r squared: 0.9784800450983737


In [16]:
ridgeCV.get_params()

{'alpha_per_target': False,
 'alphas': (0.1, 1.0, 10.0),
 'cv': 5,
 'fit_intercept': True,
 'gcv_mode': None,
 'scoring': None,
 'store_cv_values': False}

## ElasticNet Regression

In [17]:
from sklearn.linear_model import ElasticNet

regressor = ElasticNet()
regressor.fit(X_train, y_train)
y_test_prediction = regressor.predict(X_test)

print("Slope or Coefficient:", regressor.coef_)
print("Intercept:", regressor.intercept_)

Slope or Coefficient: [ 6.43058081e-02 -4.13642746e-01  1.07315087e-03 -0.00000000e+00
  3.09855931e-01  2.20962646e+00  2.29398840e+00  8.73381589e-01
 -0.00000000e+00]
Intercept: 6.845061728395062


In [18]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_test, y_test_prediction) # Actual data and Predicted Data
mae = mean_absolute_error(y_test, y_test_prediction)
rmse = np.sqrt(mse)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)

Mean Squared Error: 7.204027132336864
Mean Absolute Error: 2.02562054995965
Root Mean Squared Error: 2.6840318799032294


In [19]:
from sklearn.metrics import r2_score

score = r2_score(y_test, y_test_prediction)
print("Accuracy of r squared:", score)

n = (len(y_test) - 1) # No of observations
k = X_test.shape[1] # No of predictor variables
adjusted_score = 1 - ((1 - score) * (n - 1)  / (n - k - 1))
print("Accuracy of Adjusted r squared:", adjusted_score)

Accuracy of r squared: 0.8818928076695921
Accuracy of Adjusted r squared: 0.8667075972271111


## ElasticNet Cross Validation

In [22]:
 from sklearn.linear_model import ElasticNetCV

elasticNetCV = ElasticNetCV(cv = 5)
elasticNetCV.fit(X_train, y_train)
y_test_prediction = elasticNetCV.predict(X_test)

mse = mean_squared_error(y_test, y_test_prediction)
print("Mean Squared Error:", mse)

score = r2_score(y_test, y_test_prediction)
print("Accuracy of r squared:", score)

Mean Squared Error: 0.8854391577457683
Accuracy of r squared: 0.9854835731487825
