In [9]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(42)
n_samples = 500
X1 = np.random.rand(n_samples)
X2 = X1 + np.random.normal(0, 0.01, n_samples)
X3 = X2 + np.random.normal(0, 0.01, n_samples)
X4 = 2*X1 + np.random.normal(0, 0.01, n_samples)
X5 = 3*X2 + np.random.normal(0, 0.01, n_samples)
X6 = X3 + X4 + np.random.normal(0, 0.01, n_samples)
X7 = X5 - X1 + np.random.normal(0, 0.01, n_samples)
y = 5*X1 + 3*X2 + 2*X3 + np.random.normal(0, 0.05, n_samples)

X = np.column_stack((X1, X2, X3, X4, X5, X6, X7))

# Normalize features
X = (X - X.mean(axis=0)) / X.std(axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def ridge_regression(X, y, lr, lam, epochs=1000):
    m, n = X.shape
    X_b = np.c_[np.ones((m, 1)), X]
    theta = np.zeros((n + 1, 1))
    y = y.reshape(-1, 1)
    for _ in range(epochs):
        y_pred = X_b.dot(theta)
        error = y_pred - y
        grad = (1/m) * X_b.T.dot(error) + (lam/m)*np.r_[[[0]], theta[1:]]
        theta -= lr * grad
        if np.isnan(theta).any() or np.isinf(theta).any():
            return None, np.inf
    y_pred = X_b.dot(theta)
    cost = (1/(2*m))*np.sum((y_pred - y)**2) + (lam/(2*m))*np.sum(theta[1:]**2)
    return theta, cost

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

best_params = None
best_r2 = -np.inf
best_cost = np.inf

for lr in learning_rates:
    for lam in lambdas:
        theta, cost = ridge_regression(X_train, y_train, lr, lam)
        if theta is None or np.isinf(cost) or np.isnan(cost):
            continue
        X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]
        y_pred = X_test_b.dot(theta).ravel()
        if np.isnan(y_pred).any() or np.isinf(y_pred).any():
            continue
        r2 = r2_score(y_test, y_pred)
        if np.isnan(r2):
            continue
        if r2 > best_r2 or (r2 == best_r2 and cost < best_cost):
            best_r2 = r2
            best_cost = cost
            best_params = (lr, lam)

print("Best Learning Rate:", best_params[0])
print("Best Lambda:", best_params[1])
print("Minimum Cost:", best_cost)
print("Maximum R2 Score:", best_r2)

Best Learning Rate: 0.1
Best Lambda: 0
Minimum Cost: 0.0015561881836372283
Maximum R2 Score: 0.9996665462486102


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

url = 'https://drive.google.com/uc?id=1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG'
df = pd.read_csv(url)

df = df.dropna(subset=['Salary'])
df = pd.get_dummies(df, columns=['League','Division','NewLeague'], drop_first=True)

X = df.drop(columns=['Salary'])
y = df['Salary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr = LinearRegression()
ridge = Ridge(alpha=0.5748)
lasso = Lasso(alpha=0.5748)

lr.fit(X_train_scaled, y_train)
ridge.fit(X_train_scaled, y_train)
lasso.fit(X_train_scaled, y_train)

y_pred_lr = lr.predict(X_test_scaled)
y_pred_ridge = ridge.predict(X_test_scaled)
y_pred_lasso = lasso.predict(X_test_scaled)

models = {'Linear': y_pred_lr, 'Ridge': y_pred_ridge, 'Lasso': y_pred_lasso}

for name, y_pred in models.items():
    r2 = r2_score(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae = mean_absolute_error(y_test, y_pred)
    print(name, 'R2:', r2, 'RMSE:', rmse, 'MAE:', mae)


HTTPError: HTTP Error 404: Not Found

In [12]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

data = fetch_california_housing()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

ridge_cv = RidgeCV(alphas=[0.1, 1, 10, 100], cv=5)
lasso_cv = LassoCV(alphas=[0.001, 0.01, 0.1, 1, 10], cv=5, max_iter=10000)

ridge_cv.fit(X_train_scaled, y_train)
lasso_cv.fit(X_train_scaled, y_train)

y_pred_ridge = ridge_cv.predict(X_test_scaled)
y_pred_lasso = lasso_cv.predict(X_test_scaled)

for name, model, y_pred in [
    ('RidgeCV', ridge_cv, y_pred_ridge),
    ('LassoCV', lasso_cv, y_pred_lasso)
]:
    r2 = r2_score(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred) ** 0.5
    mae = mean_absolute_error(y_test, y_pred)
    print(name, 'Best alpha:', model.alpha_, 'R2:', r2, 'RMSE:', rmse, 'MAE:', mae)


RidgeCV Best alpha: 10.0 R2: 0.5959440604913049 RMSE: 0.7282442079450921 MAE: 0.5272132899793829
LassoCV Best alpha: 0.001 R2: 0.5963975777208825 RMSE: 0.7278353981857225 MAE: 0.5273050861894018


In [13]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=200)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.8444444444444444
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.79      0.73      0.76        15
           2       0.75      0.80      0.77        15

    accuracy                           0.84        45
   macro avg       0.85      0.84      0.84        45
weighted avg       0.85      0.84      0.84        45

Confusion Matrix:
 [[15  0  0]
 [ 0 11  4]
 [ 0  3 12]]


