In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer

In [2]:
# === 1. Load and Inspect the Data ===
file_path = r"S:\Course work\3rd year\Artificial Intelligence & Machine Learning\Workshop3\CarSharing.csv"
data = pd.read_csv(file_path)

In [3]:
# Convert timestamp to datetime and extract hour and day of week
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['hour'] = data['timestamp'].dt.hour
data['dayofweek'] = data['timestamp'].dt.dayofweek
data = data.drop('timestamp', axis=1)

In [4]:
# One-hot encode categorical variables
data = pd.get_dummies(data, drop_first=True)

In [5]:
# === 2. Prepare Features and Target ===
X = data.drop('demand', axis=1)
y = data['demand']


In [6]:
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

In [7]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# === 3. Define Evaluation Function ===
def evaluate_model(model, X_test, y_test, name="Model"):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"\n{name} Evaluation:")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    return mse, mae, r2

In [9]:
# === 4. Train and Evaluate Linear Regression ===
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_results = evaluate_model(lr_model, X_test, y_test, "Linear Regression")


Linear Regression Evaluation:
Mean Squared Error (MSE): 1.1031
Mean Absolute Error (MAE): 0.8220
R² Score: 0.4973


In [11]:
# === 5. Train and Evaluate Ridge Regression with Different Alphas ===
alphas = [0.01, 0.1, 1, 10, 100]
ridge_results = []

for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    results = evaluate_model(ridge, X_test, y_test, f"Ridge (alpha={alpha})")
    ridge_results.append((alpha, *results))


Ridge (alpha=0.01) Evaluation:
Mean Squared Error (MSE): 1.1031
Mean Absolute Error (MAE): 0.8220
R² Score: 0.4973

Ridge (alpha=0.1) Evaluation:
Mean Squared Error (MSE): 1.1031
Mean Absolute Error (MAE): 0.8220
R² Score: 0.4973

Ridge (alpha=1) Evaluation:
Mean Squared Error (MSE): 1.1032
Mean Absolute Error (MAE): 0.8220
R² Score: 0.4973

Ridge (alpha=10) Evaluation:
Mean Squared Error (MSE): 1.1032
Mean Absolute Error (MAE): 0.8220
R² Score: 0.4973

Ridge (alpha=100) Evaluation:
Mean Squared Error (MSE): 1.1037
Mean Absolute Error (MAE): 0.8225
R² Score: 0.4971


In [None]:
### 📄 Report Cell: Interpretation

The Linear Regression model achieved an R² score of approximately 0.4973, indicating it explains around 49.7% of the variance in the demand values. Ridge Regression models, tested with various alpha values (0.01 to 100), produced nearly identical performance metrics. This implies that regularization had minimal impact, suggesting the model is not significantly overfitting and the features may already be reasonably independent.

Future improvements could involve feature engineering, non-linear models, or incorporating external data (e.g., time-of-year, events) to better capture demand patterns.
