In [10]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import itertools

# -------------------------------------------
# Step 1: Generate dataset with correlated features
# -------------------------------------------
np.random.seed(0)
n = 400

X1 = np.random.randn(n)
X2 = X1 + np.random.normal(0, 0.1, n)
X3 = X1 + X2 + np.random.normal(0, 0.1, n)
X4 = X1*2 + np.random.normal(0, 0.2, n)
X5 = X2*3 + np.random.normal(0, 0.2, n)
X6 = X3*2 + np.random.normal(0, 0.1, n)
X7 = X1 + X2 + X3 + X4 + np.random.normal(0, 0.5, n)

X = np.vstack([X1,X2,X3,X4,X5,X6,X7]).T
true_w = np.array([5,3,2,4,1,-2,3])
y = X.dot(true_w) + np.random.normal(0, 3, n)

# Add bias column
X_b = np.c_[np.ones((n,1)), X]

# -------------------------------------------
# Step 2: Ridge Regression (Gradient Descent)
# -------------------------------------------
def ridge_gradient_descent(X, y, lr, lam, epochs=500):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(epochs):
        y_pred = X.dot(w)
        grad = (1/m) * X.T.dot(y_pred - y) + (lam/m)*np.r_[0, w[1:]]
        w -= lr * grad
        # Add a check for NaN in weights after update to prevent further propagation
        if np.isnan(w).any():
            break
    return w

# Hyperparameter search
# Removed learning rates 0.1 and 1 to prevent RuntimeWarnings due to numerical instability
learning_rates = [0.0001,0.001,0.01]
lambdas = [1e-15,1e-10,1e-5,1e-3,0,1,10,20]

best_params = None
best_score = -np.inf
best_cost = np.inf

for lr, lam in itertools.product(learning_rates, lambdas):
    w = ridge_gradient_descent(X_b, y, lr, lam)
    y_pred = X_b.dot(w)

    # Check for NaNs in y_pred before calculating r2_score and cost
    if np.isnan(y_pred).any():
        # If NaNs are present, this combination of hyperparameters is bad.
        # Assign a very low R2 score and a very high cost to discard it.
        r2 = -np.inf
        cost = np.inf
    else:
        cost = np.mean((y - y_pred)**2) + lam * np.sum(w[1:]**2)
        r2 = r2_score(y, y_pred)

    if r2 > best_score:
        best_score = r2
        best_cost = cost
        best_params = (lr, lam, w)

print("Best Learning Rate:", best_params[0])
print("Best Lambda:", best_params[1])
print("Best R²:", best_score)
print("Minimum Cost:", best_cost)

Best Learning Rate: 0.01
Best Lambda: 1e-15
Best R²: 0.9913571122179273
Minimum Cost: 9.157879211469126


In [11]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
df = pd.read_csv("/content/Hitters.csv")
# Remove missing values
df = df.dropna()

# Convert categorical columns
df = pd.get_dummies(df, drop_first=True)

# Features/Target
X = df.drop("Salary", axis=1)
y = df["Salary"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# -------------------------------------------
# Fit models
# -------------------------------------------
lr = LinearRegression()
ridge = Ridge(alpha=0.5748)
lasso = Lasso(alpha=0.5748)

lr.fit(X_train, y_train)
ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)

# Predictions
lr_pred = lr.predict(X_test)
ridge_pred = ridge.predict(X_test)
lasso_pred = lasso.predict(X_test)

# Evaluation
print("Linear Reg R2:", r2_score(y_test, lr_pred))
print("Ridge Reg R2:", r2_score(y_test, ridge_pred))
print("Lasso Reg R2:", r2_score(y_test, lasso_pred))

print("\nMSE Scores:")
print("Linear:", mean_squared_error(y_test, lr_pred))
print("Ridge:", mean_squared_error(y_test, ridge_pred))
print("Lasso:", mean_squared_error(y_test, lasso_pred))

print("\nBest Model:",
      "Ridge (handles multicollinearity best)"
      if r2_score(y_test, ridge_pred) > r2_score(y_test, lr_pred) else "Linear")


Linear Reg R2: 0.290745185579814
Ridge Reg R2: 0.30003596988293446
Lasso Reg R2: 0.29928590166965496

MSE Scores:
Linear: 128284.34549672354
Ridge: 126603.90264424692
Lasso: 126739.56899132291

Best Model: Ridge (handles multicollinearity best)


  model = cd_fast.enet_coordinate_descent(


In [12]:
from sklearn.datasets import fetch_openml
from sklearn.linear_model import RidgeCV, LassoCV

# Load Boston dataset
boston = fetch_openml("boston", as_frame=True)
X = boston.data
y = boston.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# RidgeCV
ridge_cv = RidgeCV(alphas=[0.1,1,10])
ridge_cv.fit(X_train, y_train)

# LassoCV
lasso_cv = LassoCV(cv=5)
lasso_cv.fit(X_train, y_train)

print("Best Ridge Alpha:", ridge_cv.alpha_)
print("Ridge R2:", ridge_cv.score(X_test, y_test))

print("Best Lasso Alpha:", lasso_cv.alpha_)
print("Lasso R2:", lasso_cv.score(X_test, y_test))

Best Ridge Alpha: 10.0
Ridge R2: 0.6659677905050292
Best Lasso Alpha: 0.006863892263379668
Lasso R2: 0.6683883969336302


- version 1, status: active
  url: https://www.openml.org/search?type=data&id=531
- version 2, status: active
  url: https://www.openml.org/search?type=data&id=853



In [13]:
from sklearn.datasets import load_iris
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target
m, n = X.shape

# Add bias
X = np.c_[np.ones(m), X]

# Sigmoid
def sigmoid(z):
    return 1/(1+np.exp(-z))

# Logistic Regression for one class
def train_binary(X, y, lr=0.1, epochs=2000):
    m, n = X.shape
    w = np.zeros(n)
    for _ in range(epochs):
        z = X.dot(w)
        h = sigmoid(z)
        gradient = (1/m) * X.T.dot(h - y)
        w -= lr * gradient
    return w

# One-vs-rest
weights = []
classes = np.unique(y)

for c in classes:
    y_binary = (y == c).astype(int)
    w = train_binary(X, y_binary)
    weights.append(w)

weights = np.array(weights)

# Prediction
def predict(X):
    scores = X.dot(weights.T)
    return np.argmax(scores, axis=1)

# Accuracy
y_pred = predict(X)
print("Training Accuracy:", np.mean(y_pred == y))

Training Accuracy: 0.96
