In [None]:
# ======================================
# STEP 1: Mount Google Drive
# ======================================
from google.colab import drive
drive.mount('/content/drive')

# ======================================
# STEP 2: Import Required Libraries
# ======================================
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet

from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    confusion_matrix,
    roc_curve,
    auc
)

# ======================================
# STEP 3: Load External Dataset
# ======================================
data_path = '/content/drive/MyDrive/dataset.csv'  # change path
df = pd.read_csv(data_path)

# Preview dataset
df.head()

# ======================================
# STEP 4: Split Features and Target
# ======================================
X = df.iloc[:, :-1]   # features
y = df.iloc[:, -1]    # target

# ======================================
# STEP 5: Train-Test Split
# ======================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# ======================================
# STEP 6: Feature Scaling (MANDATORY for Elastic Net)
# ======================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ======================================
# STEP 7: Train Elastic Net Regressor
# ======================================
model = ElasticNet(
    alpha=0.1,      # regularization strength
    l1_ratio=0.5,   # balance between L1 & L2
    random_state=42
)

model.fit(X_train_scaled, y_train)

# ======================================
# STEP 8: Make Predictions
# ======================================
y_pred = model.predict(X_test_scaled)

# ======================================
# STEP 9: Regression Evaluation Metrics
# ======================================
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MSE :", mse)
print("MAE :", mae)
print("RMSE:", rmse)
print("RÂ²  :", r2)

# ======================================
# STEP 10: Seaborn Visualizations
# ======================================

# Actual vs Predicted
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Elastic Net: Actual vs Predicted")
plt.show()

# Residual Plot
residuals = y_test - y_pred
sns.histplot(residuals, kde=True)
plt.title("Residual Distribution")
plt.show()

# ======================================
# STEP 11: OPTIONAL Classification Metrics (Educational)
# ======================================
# Convert regression output to binary using median threshold
threshold = y_test.median()

y_test_binary = (y_test >= threshold).astype(int)
y_pred_binary = (y_pred >= threshold).astype(int)

# Confusion Matrix
cm = confusion_matrix(y_test_binary, y_pred_binary)
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens')
plt.title("Confusion Matrix (Threshold-Based)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# ROC Curve & AUC
fpr, tpr, _ = roc_curve(y_test_binary, y_pred)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Thresholded Regression Output)")
plt.legend()
plt.show()