<a href="https://colab.research.google.com/github/S-Devisri01/Python-colab/blob/main/Python_gen_ai_day_6_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ============================
# Import required libraries
# ============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import r2_score

import statsmodels.api as sm


# ============================
# Load dataset
# ============================
df = pd.read_csv("/content/employee_salary.csv")

X = df[["Experience", "EducationLevel", "Age"]]
y = df["Salary"]


# ============================
# Cross-Validation with Ridge
# ============================
ridge = Ridge(alpha=1.0)

scores = cross_val_score(
    ridge,
    X,
    y,
    cv=5,
    scoring="r2"
)

print("Cross-Validation R² scores:", scores)
print("Average CV Score:", np.mean(scores))


# ============================
# Grid Search for best alpha
# ============================
params = {
    "alpha": [0.01, 0.1, 1, 10, 100]
}

grid = GridSearchCV(
    Ridge(),
    params,
    cv=5,
    scoring="r2"
)

grid.fit(X, y)

print("Best Alpha:", grid.best_params_)
print("Best CV Score:", grid.best_score_)


# ============================
# Adjusted R² (manual)
# ============================
best_ridge = Ridge(alpha=grid.best_params_["alpha"])
best_ridge.fit(X, y)

y_pred = best_ridge.predict(X)

r2 = r2_score(y, y_pred)

n, k = X.shape
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)

print("R²:", r2)
print("Adjusted R²:", adj_r2)


# ============================
# AIC & BIC using statsmodels
# ============================
X_const = sm.add_constant(X)   # add intercept
ols_model = sm.OLS(y, X_const).fit()

print("AIC:", ols_model.aic)
print("BIC:", ols_model.bic)


Cross-Validation R² scores: [0.89006932 0.89772997 0.90819285 0.87589803 0.88310694]
Average CV Score: 0.8909994215752061
Best Alpha: {'alpha': 1}
Best CV Score: 0.8909994215752061
R²: 0.8925838993553217
Adjusted R²: 0.8922603568835004
AIC: 19848.681846282172
BIC: 19868.3128673981
