In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv("4-Algerian_forest_fires_dataset.csv")

In [None]:
df.tail()

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
df.drop(122, inplace=True)

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
df.loc[:123, "Region"] = 0
df.loc[123:, "Region"] = 1

In [None]:
df.head()

In [None]:
df.loc[121]

In [None]:
df = df.dropna().reset_index(drop=True)

In [None]:
df.isnull().sum()

In [None]:
df.columns

In [None]:
df.columns = df.columns.str.strip()

In [None]:
df.columns

In [None]:
df[df["day"] == "day"]

In [None]:
df.drop(122, inplace=True)

In [None]:
df[df["day"] == "day"]

In [None]:
df[["day", "month", "year", "Temperature", "RH", "Ws"]] = df[["day", "month", "year", "Temperature", "RH", "Ws"]].astype(int)

In [None]:
df[['Rain', 'FFMC','DMC', 'DC', 'ISI', 'BUI', 'FWI']] = df[['Rain', 'FFMC','DMC', 'DC', 'ISI', 'BUI', 'FWI']].astype(float)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df["Classes"] = np.where(df["Classes"].str.contains("not fire"),0,1)

In [None]:
df["Classes"].value_counts()

In [None]:
df["Classes"].value_counts(normalize=True) * 100

In [None]:
df["Classes"] = df["Classes"].astype(bool)

In [None]:
df.info()

In [None]:
df.corr()

In [None]:
df.drop(["day","month","year"], axis=1, inplace=True)

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt=".1f", cmap="coolwarm")
plt.title("Correlation Matrix")
plt.show()

In [None]:
#dependent & independent features

In [None]:
X = df.drop("FWI", axis=1)
y = df["FWI"]

In [None]:
X

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=42)

In [None]:
X_train.shape

In [None]:
X_train.corr()

In [None]:
print(X_train.corr().iloc[0,3])

In [None]:
def corr_for_dropping(df, threshold):
    columns_to_drop = set()
    corr = df.corr()
    for i in range(len(corr.columns)):
        for j in range(i):
            if abs(corr.iloc[i,j]) > threshold:
                columns_to_drop.add(corr.columns[i])
    return columns_to_drop

In [None]:
columns_dropping = corr_for_dropping(X_train, 0.85)

In [None]:
columns_dropping

In [None]:
X_train.drop(columns_dropping, axis=1, inplace=True)
X_test.drop(columns_dropping, axis=1, inplace=True)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
plt.subplots(figsize=(15,5))
plt.subplot(1,2,1)
sns.boxplot(data=X_train)
plt.title("X_train")
plt.subplot(1,2,2)
sns.boxplot(data=X_train_scaled)
plt.title("X_train_scaled")
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score

In [None]:
from sklearn.linear_model import LinearRegression
linear = LinearRegression()
linear.fit(X_train_scaled, y_train)
y_pred = linear.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("Linear Regression")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
from sklearn.linear_model import Ridge
ridge = Ridge()
ridge.fit(X_train_scaled, y_train)
y_pred = ridge.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("Ridge")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("Lasso")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
from sklearn.linear_model import ElasticNet
elasticNet = ElasticNet()
elasticNet.fit(X_train_scaled, y_train)
y_pred = elasticNet.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("ElasticNet")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
# cross validation

In [None]:
from sklearn.linear_model import RidgeCV
ridgecv = RidgeCV(cv=5)
ridgecv.fit(X_train_scaled, y_train)
y_pred = ridgecv.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("RidgeCV")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
from sklearn.linear_model import LassoCV
lassocv = LassoCV(cv=5)
lassocv.fit(X_train_scaled, y_train)
y_pred = lassocv.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("LassoCV")
plt.scatter(y_test, y_pred)
plt.show()

In [None]:
lassocv.alphas_

In [None]:
lassocv.alpha_

In [None]:
from sklearn.linear_model import ElasticNetCV
elasticnetcv = ElasticNetCV(cv=5)
elasticnetcv.fit(X_train_scaled, y_train)
y_pred = elasticnetcv.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mae",mae)
print("mse",mse)
print("r2",score)
plt.title("ElasticNetCV")
plt.scatter(y_test, y_pred)
plt.show()

In [62]:
elasticnetcv.alphas_

array([14.11706004, 13.16561744, 12.27829889, 11.45078264, 10.67903821,
        9.95930678,  9.28808283,  8.66209714,  8.07830078,  7.53385034,
        7.02609405,  6.55255882,  6.11093829,  5.6990815 ,  5.31498248,
        4.95677045,  4.62270071,  4.31114616,  4.02058933,  3.74961507,
        3.49690356,  3.26122397,  3.04142839,  2.83644629,  2.64527931,
        2.46699633,  2.30072904,  2.1456676 ,  2.00105679,  1.86619226,
        1.74041714,  1.62311885,  1.51372607,  1.411706  ,  1.31656174,
        1.22782989,  1.14507826,  1.06790382,  0.99593068,  0.92880828,
        0.86620971,  0.80783008,  0.75338503,  0.7026094 ,  0.65525588,
        0.61109383,  0.56990815,  0.53149825,  0.49567705,  0.46227007,
        0.43111462,  0.40205893,  0.37496151,  0.34969036,  0.3261224 ,
        0.30414284,  0.28364463,  0.26452793,  0.24669963,  0.2300729 ,
        0.21456676,  0.20010568,  0.18661923,  0.17404171,  0.16231189,
        0.15137261,  0.1411706 ,  0.13165617,  0.12278299,  0.11

In [64]:
elasticnetcv.alpha_

np.float64(0.04311146156383891)