In [4]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.model_selection import KFold, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv'

# Load the dataset
df = pd.read_csv(url)

# Display the first few rows of the dataset
print(df.head())

# Prepare the data
# Select features and target variable
X = df.drop(columns=['area'])  # Drop the target variable 'area'
y = df['area']  # Target variable

# Convert categorical variables to dummy variables
X = pd.get_dummies(X, drop_first=True)

# Initialize the models
models = {
    'Linear Regression': LinearRegression(),
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'ElasticNet': ElasticNet()
}

# Initialize KFold with 5 splits
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform 5-Fold Cross Validation using cross_val_score()
print("5-Fold Cross Validation using cross_val_score()")
for name, model in models.items():
    mse_scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-mse_scores)
    mean_rmse = np.mean(rmse_scores)
    std_rmse = np.std(rmse_scores)
    print(f'{name} - Mean RMSE: {mean_rmse:.4f}, Std RMSE: {std_rmse:.4f}')

# Perform 5-Fold Cross Validation using cross_val_predict()
print("\n5-Fold Cross Validation using cross_val_predict()")
for name, model in models.items():
    y_pred = cross_val_predict(model, X, y, cv=kf)
    rmse = np.sqrt(mean_squared_error(y, y_pred))
    print(f'{name} - RMSE: {rmse:.4f}')

# Evaluate the results
print("\nEvaluation of Models")
for name, model in models.items():
    model.fit(X, y)
    y_pred = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y, y_pred))
    print(f'{name} - RMSE on Training Data: {rmse:.4f}')

   X  Y month  day  FFMC   DMC     DC  ISI  temp  RH  wind  rain  area
0  7  5   mar  fri  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0
1  7  4   oct  tue  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0
2  7  4   oct  sat  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0
3  8  6   mar  fri  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0
4  8  6   mar  sun  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0
5-Fold Cross Validation using cross_val_score()
Linear Regression - Mean RMSE: 55.6451, Std RMSE: 31.8700
Lasso - Mean RMSE: 54.1803, Std RMSE: 33.0457
Ridge - Mean RMSE: 55.3870, Std RMSE: 32.0907
ElasticNet - Mean RMSE: 54.0546, Std RMSE: 33.1067

5-Fold Cross Validation using cross_val_predict()
Linear Regression - RMSE: 64.1980
Lasso - RMSE: 63.5400
Ridge - RMSE: 64.0857
ElasticNet - RMSE: 63.4646

Evaluation of Models
Linear Regression - RMSE on Training Data: 62.1214
Lasso - RMSE on Training Data: 62.6429
Ridge - RMSE on Training Data: 62.1738
ElasticNet - RMSE 