ASSIGNMENT 4

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import math

# ----------------------------
# Create dataset
# ----------------------------
np.random.seed(42)
n_samples = 200
X_base = np.random.rand(n_samples, 1)
X = np.hstack([X_base + np.random.normal(0, 0.01, (n_samples, 1)) for _ in range(7)])
y = 3*X[:, 0] + 2*X[:, 1] - 4*X[:, 2] + np.random.normal(0, 0.05, n_samples)

# train-test split & scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ----------------------------
# Robust Ridge GD with NaN/inf checks
# ----------------------------
def is_finite_array(a):
    return np.all(np.isfinite(a))

def ridge_regression_gd_safe(X, y, lr=0.01, lam=1e-3, n_iter=1000, grad_clip=None, verbose=False):
    n_samples, n_features = X.shape
    w = np.zeros(n_features, dtype=float)
    b = 0.0

    for it in range(n_iter):
        y_pred = X.dot(w) + b
        error = y - y_pred

        # compute gradients
        dw = (-2/n_samples) * X.T.dot(error) + 2*lam*w
        db = (-2/n_samples) * np.sum(error)

        # gradient clipping (optional)
        if grad_clip is not None:
            dw = np.clip(dw, -grad_clip, grad_clip)
            db = np.clip(db, -grad_clip, grad_clip)

        # update
        w = w - lr * dw
        b = b - lr * db

        # check for NaN or inf
        if not (is_finite_array(w) and np.isfinite(b)):
            if verbose:
                print(f"Stopped at iter {it}: non-finite w or b (lr={lr}, lam={lam})")
            return None, None, float('inf'), False  # mark invalid

        # optional early check on cost (avoid overflow)
        cost = np.mean((y - (X.dot(w) + b))**2) + lam * np.sum(w**2)
        if not np.isfinite(cost):
            if verbose:
                print(f"Stopped at iter {it}: non-finite cost (lr={lr}, lam={lam})")
            return None, None, float('inf'), False

    # final eval
    final_pred = X.dot(w) + b
    final_cost = np.mean((y - final_pred)**2) + lam * np.sum(w**2)
    return w, b, final_cost, True

# ----------------------------
# Run grid safely and collect results
# ----------------------------
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

results = []
invalid_combos = []

for lr in learning_rates:
    for lam in lambdas:
        w, b, cost, valid = ridge_regression_gd_safe(X_train, y_train, lr=lr, lam=lam, n_iter=2000, grad_clip=1e6, verbose=False)
        if not valid or w is None:
            invalid_combos.append((lr, lam))
            continue  # skip computing r2

        y_pred_test = X_test.dot(w) + b
        if not is_finite_array(y_pred_test):
            invalid_combos.append((lr, lam))
            continue

        r2 = r2_score(y_test, y_pred_test)
        results.append({'Learning Rate': lr, 'Lambda': lam, 'Cost': cost, 'R2_Score': r2})

df_results = pd.DataFrame(results)
df_results_sorted = df_results.sort_values(by='R2_Score', ascending=False).reset_index(drop=True)

print("Top results (if any):")
print(df_results_sorted.head(10))
print("\nInvalid combos (diverged or produced non-finite values):")
print(invalid_combos)

if not df_results.empty:
    best = df_results_sorted.iloc[0]
    print("\nBest combo:")
    print(best)
else:
    print("\nNo valid combos found â€” try smaller learning rates and/or fewer iterations.")


Top results (if any):
   Learning Rate        Lambda      Cost  R2_Score
0           0.10  0.000000e+00  0.003467  0.949357
1           0.10  1.000000e-15  0.003467  0.949357
2           0.10  1.000000e-10  0.003467  0.949357
3           0.10  1.000000e-05  0.003472  0.949316
4           0.10  1.000000e-03  0.003904  0.945652
5           0.01  0.000000e+00  0.004807  0.926223
6           0.01  1.000000e-15  0.004807  0.926223
7           0.01  1.000000e-10  0.004807  0.926223
8           0.01  1.000000e-05  0.004807  0.926223
9           0.01  1.000000e-03  0.004828  0.926173

Invalid combos (diverged or produced non-finite values):
[]

Best combo:
Learning Rate    0.100000
Lambda           0.000000
Cost             0.003467
R2_Score         0.949357
Name: 0, dtype: float64


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

csv_url = "https://git.wur.nl/ridde020/msc-course-machine-learning/-/raw/main/islr_data/Hitters.csv"
df = pd.read_csv(csv_url, index_col=0)
df = df.dropna(subset=["Salary"])

cat_cols = [c for c in df.columns if df[c].dtype == 'object']
df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

X = df.drop(columns=["Salary"])
y = df["Salary"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Linear": LinearRegression(),
    "RidgeCV": RidgeCV(alphas=np.logspace(-3, 3, 50), store_cv_values=True),
    "LassoCV": LassoCV(alphas=None, cv=5, max_iter=5000, random_state=42),
    "RandomForest": RandomForestRegressor(n_estimators=200, random_state=42)
}

results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))   # fixed line
    alpha = getattr(model, "alpha_", None)
    results.append({"Model": name, "R2": r2, "RMSE": rmse, "Alpha": alpha})

res_df = pd.DataFrame(results).sort_values(by="R2", ascending=False).reset_index(drop=True)
print(res_df)




          Model        R2        RMSE       Alpha
0  RandomForest  0.459586  312.643245         NaN
1        Linear  0.235397  371.880773         NaN
2       LassoCV  0.209663  378.087091   25.391196
3       RidgeCV  0.199792  380.440909  104.811313


In [6]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score, mean_squared_error

data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

alphas = np.logspace(-4, 4, 50)

ridge_cv = RidgeCV(alphas=alphas, store_cv_values=True)
lasso_cv = LassoCV(alphas=None, cv=5, max_iter=5000, random_state=42)

ridge_cv.fit(X_train_s, y_train)
lasso_cv.fit(X_train_s, y_train)

y_pred_ridge = ridge_cv.predict(X_test_s)
y_pred_lasso = lasso_cv.predict(X_test_s)

ridge_r2 = r2_score(y_test, y_pred_ridge)
lasso_r2 = r2_score(y_test, y_pred_lasso)
ridge_rmse = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
lasso_rmse = np.sqrt(mean_squared_error(y_test, y_pred_lasso))

print(f"RidgeCV alpha: {ridge_cv.alpha_:.6g}, R2: {ridge_r2:.4f}, RMSE: {ridge_rmse:.4f}")
print(f"LassoCV alpha: {lasso_cv.alpha_:.6g}, R2: {lasso_r2:.4f}, RMSE: {lasso_rmse:.4f}")




RidgeCV alpha: 1.75751, R2: 0.5758, RMSE: 0.7455
LassoCV alpha: 0.00079852, R2: 0.5766, RMSE: 0.7448


In [7]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from scipy.special import expit  # sigmoid function

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Sigmoid (already using expit)
def sigmoid(z):
    return expit(z)

# Cost function
def compute_cost(y, y_hat):
    m = len(y)
    return -(1/m) * np.sum(y*np.log(y_hat + 1e-15) + (1-y)*np.log(1-y_hat + 1e-15))

# Binary logistic regression (GD)
def train_binary_logistic(X, y, lr=0.1, n_iter=1000, lam=0.0):
    m, n = X.shape
    w = np.zeros(n)
    b = 0.0

    for _ in range(n_iter):
        z = np.dot(X, w) + b
        y_hat = sigmoid(z)
        dw = (1/m) * np.dot(X.T, (y_hat - y)) + lam*w
        db = (1/m) * np.sum(y_hat - y)
        w -= lr * dw
        b -= lr * db
    return w, b

# One-vs-Rest implementation
def train_ovr(X, y, num_classes, lr=0.1, n_iter=1000):
    models = []
    for c in range(num_classes):
        y_binary = np.where(y == c, 1, 0)
        w, b = train_binary_logistic(X, y_binary, lr, n_iter)
        models.append((w, b))
    return models

def predict_ovr(X, models):
    probs = []
    for w, b in models:
        probs.append(sigmoid(np.dot(X, w) + b))
    probs = np.array(probs).T
    return np.argmax(probs, axis=1)

# Train models for each class (3 classes)
models = train_ovr(X_train, y_train, num_classes=3, lr=0.1, n_iter=2000)

# Predictions
y_pred = predict_ovr(X_test, models)

# Evaluation
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", acc)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

# Compare with sklearn built-in LogisticRegression (OvR default)
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=1000)
lr_model.fit(X_train, y_train)
y_pred_sklearn = lr_model.predict(X_test)
print("Sklearn OvR Accuracy:", accuracy_score(y_test, y_pred_sklearn))


Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      0.89      0.94         9
   virginica       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

Sklearn OvR Accuracy: 0.9666666666666667


