In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

: 

In [2]:
df = pd.read_csv('Algerian_forest_fires_cleaned_dataset.csv')

In [None]:
df.head()

In [4]:
# Drop month day and year
df.drop(['day','month','year'],axis=1,inplace=True)

In [None]:
df.head()

In [6]:
# Encoding Classes 
df['Classes'] = np.where(df['Classes'].str.contains('not fire'),0,1)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [10]:
# Independet and Dependent features

X = df.drop('FWI',axis=1)

In [None]:
X.head()

In [None]:
y = df['FWI']
y.head()

In [13]:
# Train test split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [None]:
X_train.shape , X_test.shape

In [None]:
y_train.shape , y_test.shape

In [None]:
# Feature Selection
X_train.corr()

In [None]:
plt.figure(figsize=(15,7))
sns.heatmap(X_train.corr(),annot=True)

In [18]:
# Function to check multicollinerity between independent features

def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [19]:
corr_feature = correlation(X_train,0.85)

In [20]:
X_train.drop(corr_feature,axis=1,inplace=True)
X_test.drop(corr_feature,axis=1,inplace=True)

In [None]:
X_train.shape , X_test.shape

In [22]:
# Feature Scaling and Standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
X_train_scaled


In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
sns.boxplot(data=X_train)
plt.title("Training Data before Scaling",weight="bold",size=10)
plt.subplot(1,2,2)
sns.boxplot(data=X_train_scaled)
plt.title("Training Data after Scaling",weight="bold", size=10)


## Linear Regression Model

In [25]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings("ignore")

In [None]:
reg = LinearRegression()
reg.fit(X_train_scaled, y_train)
y_pred = reg.predict(X_test_scaled)

# mae
mae = mean_absolute_error(y_test,y_pred)

# r2 score
score = r2_score(y_test, y_pred)

print('Mean Absolute error : ' , mae)
print('R2 Score : ', score)

In [None]:
plt.scatter(y_test,y_pred)

## Lasso Regression

In [28]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score


In [None]:
lasso = Lasso()
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)

# mae
mae = mean_absolute_error(y_test,y_pred)

# r2 score
score = r2_score(y_test, y_pred)

print('Mean Absolute error : ' , mae)
print('R2 Score : ', score)

plt.scatter(y_test,y_pred)

## Ridge Regreession

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

ridge = Ridge()
ridge.fit(X_train_scaled, y_train)
y_pred = ridge.predict(X_test_scaled)

# mae
mae = mean_absolute_error(y_test,y_pred)

# r2 score
score = r2_score(y_test, y_pred)

print('Mean Absolute error : ' , mae)
print('R2 Score : ', score)

plt.scatter(y_test,y_pred)

## Elastic Net ML

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

elastic = ElasticNet()
elastic.fit(X_train_scaled, y_train)
y_pred = elastic.predict(X_test_scaled)

# mae
mae = mean_absolute_error(y_test,y_pred)

# r2 score
score = r2_score(y_test, y_pred)

print('Mean Absolute error : ' , mae)
print('R2 Score : ', score)

plt.scatter(y_test,y_pred)
plt.show()

## Cross Validation Lasso

In [None]:
from sklearn.linear_model import LassoCV
lassocv = LassoCV(cv=5)
lassocv.fit(X_train_scaled,y_train)

In [35]:
y_pred=lassocv.predict(X_test_scaled)

In [None]:
# mae
mae = mean_absolute_error(y_test,y_pred)

# r2 score
score = r2_score(y_test, y_pred)

print('Mean Absolute error : ' , mae)
print('R2 Score : ', score)


In [None]:
# Pickling machine learning model and processing model standard scaler

scaler

In [None]:
ridge


In [40]:
import pickle

pickle.dump(scaler,open('scaler.pkl','wb'))
pickle.dump(ridge,open('ridge.pkl','wb'))