In [72]:
# Hinted imports
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression


data = pd.read_csv("/content/drive/MyDrive/Dataset/housing (1).csv")  # replace with your file path

# Prepare features and target
X = data.drop(columns=["median_house_value", "ocean_proximity"])  # drop target + categorical
y = data["median_house_value"]

#  Fill missing values with median
X = X.fillna(X.median())

#  Scale target (divide by 100000 â†’ units in 100k USD)
y = y / 100000

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Baseline Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)

y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

print("Baseline Linear Regression")
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred)))
print("Coefficients:", lr.coef_)

# Ridge Regression (L2) with GridSearchCV
ridge = Ridge()
alpha_grid = {'alpha':[0.01, 0.1, 1, 10, 100]}

ridge_cv = GridSearchCV(ridge, alpha_grid, cv=5, scoring='neg_mean_squared_error')
ridge_cv.fit(X_train, y_train)
best_ridge = ridge_cv.best_estimator_

y_train_pred_ridge = best_ridge.predict(X_train)
y_test_pred_ridge = best_ridge.predict(X_test)

print("\nRidge Regression")
print("Best alpha:", ridge_cv.best_params_)
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred_ridge)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred_ridge)))
print("Coefficients:", best_ridge.coef_)

# Lasso Regression (L1) with GridSearchCV
lasso = Lasso(max_iter=10000)
alpha_grid = {'alpha':[0.001, 0.01, 0.1, 1, 10]}

lasso_cv = GridSearchCV(lasso, alpha_grid, cv=5, scoring='neg_mean_squared_error')
lasso_cv.fit(X_train, y_train)
best_lasso = lasso_cv.best_estimator_

y_train_pred_lasso = best_lasso.predict(X_train)
y_test_pred_lasso = best_lasso.predict(X_test)

print("\nLasso Regression")
print("Best alpha:", lasso_cv.best_params_)
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred_lasso)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred_lasso)))
print("Coefficients:", best_lasso.coef_)


Baseline Linear Regression
Train RMSE: 0.6936234135238081
Test RMSE: 0.7113317349286027
Coefficients: [-4.26323917e-01 -4.24500719e-01  1.18280965e-02 -8.18797708e-05
  1.16260128e-03 -3.84922131e-04  4.63425720e-04  4.05384044e-01]

Ridge Regression
Best alpha: {'alpha': 10}
Train RMSE: 0.69362375117932
Test RMSE: 0.7113122477395921
Coefficients: [-4.25356271e-01 -4.23596665e-01  1.18435199e-02 -8.19693583e-05
  1.16124492e-03 -3.84961512e-04  4.65690255e-04  4.05435655e-01]

Lasso Regression
Best alpha: {'alpha': 0.001}
Train RMSE: 0.6936284261311465
Test RMSE: 0.7112533592034614
Coefficients: [-4.22613024e-01 -4.21033735e-01  1.18800296e-02 -8.21939245e-05
  1.15708842e-03 -3.85081132e-04  4.72235506e-04  4.05552476e-01]


In [73]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [74]:
# Baseline Logistic Regression
log_reg = LogisticRegression(max_iter=5000)
log_reg.fit(X_train, y_train)

# Predictions
y_train_pred = log_reg.predict(X_train)
y_test_pred = log_reg.predict(X_test)

# Accuracy
train_acc = accuracy_score(y_train, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)

print("Baseline Logistic Regression")
print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)
print("Coefficients:", log_reg.coef_)


Baseline Logistic Regression
Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]


In [75]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2']
}

log_cv = GridSearchCV(
    LogisticRegression(solver='liblinear', max_iter=5000),
    param_grid,
    cv=5,
    scoring='accuracy'
)

log_cv.fit(X_train, y_train)

best_log = log_cv.best_estimator_

print("Best Hyperparameters:", log_cv.best_params_)


Best Hyperparameters: {'C': 100, 'penalty': 'l1'}


In [76]:
# Predictions with optimized model
y_train_pred_best = best_log.predict(X_train)
y_test_pred_best = best_log.predict(X_test)

# Accuracy
train_acc_best = accuracy_score(y_train, y_train_pred_best)
test_acc_best = accuracy_score(y_test, y_test_pred_best)

print("Optimized Logistic Regression")
print("Training Accuracy:", train_acc_best)
print("Test Accuracy:", test_acc_best)
print("Coefficients:", best_log.coef_)


Optimized Logistic Regression
Training Accuracy: 0.989010989010989
Test Accuracy: 0.9824561403508771
Coefficients: [[ 7.52031530e-01 -1.08238276e-01  9.36367563e-02 -2.04921115e-03
   0.00000000e+00  4.73424048e+01 -1.18978774e+01 -1.36227109e+02
   1.97808920e+01  0.00000000e+00  0.00000000e+00  1.72907576e+00
  -1.35522151e-05 -1.97602756e-01  0.00000000e+00  0.00000000e+00
   5.00605810e+01  0.00000000e+00  1.89416013e+01  0.00000000e+00
   2.19277080e-01 -4.38585767e-01  5.79568762e-02 -2.11693836e-02
  -2.16016782e+01  7.96840993e+00 -1.46104470e+01 -2.52660823e+01
  -2.53914971e+01  0.00000000e+00]]


In [77]:
# L1 Logistic Regression
log_l1 = LogisticRegression(
    penalty='l1',
    C=best_log.C,
    solver='liblinear',
    max_iter=5000
)

# L2 Logistic Regression
log_l2 = LogisticRegression(
    penalty='l2',
    C=best_log.C,
    solver='liblinear',
    max_iter=5000
)

log_l1.fit(X_train, y_train)
log_l2.fit(X_train, y_train)

# Accuracy comparison
print("L1 Train Accuracy:", accuracy_score(y_train, log_l1.predict(X_train)))
print("L1 Test Accuracy:", accuracy_score(y_test, log_l1.predict(X_test)))

print("L2 Train Accuracy:", accuracy_score(y_train, log_l2.predict(X_train)))
print("L2 Test Accuracy:", accuracy_score(y_test, log_l2.predict(X_test)))

print("L1 Coefficients:", log_l1.coef_)
print("L2 Coefficients:", log_l2.coef_)


L1 Train Accuracy: 0.989010989010989
L1 Test Accuracy: 0.9824561403508771
L2 Train Accuracy: 0.9692307692307692
L2 Test Accuracy: 0.956140350877193
L1 Coefficients: [[ 7.18038257e-01 -1.08259512e-01  1.03657723e-01 -2.36056754e-03
   0.00000000e+00  4.70310228e+01 -1.17910449e+01 -1.36179669e+02
   1.97346729e+01  0.00000000e+00  0.00000000e+00  1.73294839e+00
   0.00000000e+00 -1.97788429e-01  0.00000000e+00  0.00000000e+00
   5.00927989e+01  0.00000000e+00  1.85896116e+01  0.00000000e+00
   2.10441813e-01 -4.39198248e-01  5.51390413e-02 -2.09352586e-02
  -2.17620979e+01  8.03603714e+00 -1.46571367e+01 -2.52985816e+01
  -2.53138997e+01  0.00000000e+00]]
L2 Coefficients: [[ 5.40272741  0.26573732 -0.52651203 -0.02095479 -2.29922151 -0.2169419
  -3.56980218 -5.0110607  -2.26418385  0.36762747 -0.58036525  3.84169255
  -0.63875637 -0.10714305 -0.40591246  3.57237002  4.36577514 -0.26636925
   0.39354765  0.62630698 -0.20718582 -0.68999884  0.17661962 -0.01813087
  -4.67716843 -0.0140846 

In [78]:
results = pd.DataFrame({
    "Model": ["Baseline", "Optimized", "L1", "L2"],
    "Train Accuracy": [
        train_acc,
        train_acc_best,
        accuracy_score(y_train, log_l1.predict(X_train)),
        accuracy_score(y_train, log_l2.predict(X_train))
    ],
    "Test Accuracy": [
        test_acc,
        test_acc_best,
        accuracy_score(y_test, log_l1.predict(X_test)),
        accuracy_score(y_test, log_l2.predict(X_test))
    ]
})

results


Unnamed: 0,Model,Train Accuracy,Test Accuracy
0,Baseline,0.958242,0.95614
1,Optimized,0.989011,0.982456
2,L1,0.989011,0.982456
3,L2,0.969231,0.95614
