QUES1

In [14]:

import numpy as np
import pandas as pd
from sklearn.metrics import r2_score

# Step 1: Generate synthetic dataset
np.random.seed(42)
n_samples = 1000
base_feature = np.random.rand(n_samples)

# Create 7 highly correlated features
n_features = 7
features = [base_feature + np.random.randn(n_samples) * 0.05 for _ in range(n_features)]
X = np.column_stack(features)

# True coefficients
true_theta = np.array([10, 9, 8, 7, 6, 5, 4])
y = X.dot(true_theta) + np.random.randn(n_samples) * 0.5  # add noise

# Step 2: Normalize features
X = (X - X.mean(axis=0)) / X.std(axis=0)
m, n = X.shape

# Step 3: Add bias term (column of ones)
X = np.c_[np.ones((m, 1)), X]  # shape (m, n+1)

# Step 4: Ridge Cost Function
def ridge_cost(X, y, theta, lam):
    m = len(y)
    predictions = X.dot(theta)
    cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (lam / (2 * m)) * np.sum(theta[1:] ** 2)
    return cost

# Step 5: Gradient Descent Update
def ridge_gradient_descent(X, y, theta, alpha, lam, num_iters):
    m = len(y)
    cost_history = []

    for _ in range(num_iters):
        predictions = X.dot(theta)
        gradient = (1 / m) * (X.T.dot(predictions - y)) + (lam / m) * np.r_[[0], theta[1:]]
        theta -= alpha * gradient
        cost_history.append(ridge_cost(X, y, theta, lam))
        # Added check for NaN in theta to prevent further calculations with NaNs
        if np.isnan(theta).any():
            print(f"Warning: NaN encountered in theta at iteration {_} for alpha={alpha}, lambda={lam}. Stopping gradient descent.")
            break


    return theta, cost_history

# Step 6: Try different learning rates and lambda values
# Reduced learning rates to avoid divergence
learning_rates = [0.0001, 0.001, 0.01, 0.1]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

best_score = -np.inf
best_params = {}
best_theta = None

print("Starting hyperparameter tuning with reduced learning rates...")

for alpha in learning_rates:
    for lam in lambdas:
        print(f"Testing learning_rate: {alpha}, lambda: {lam}")
        theta = np.zeros(n + 1)
        theta, cost_history = ridge_gradient_descent(X, y, theta, alpha, lam, num_iters=1000)

        # Check if theta contains NaN before calculating predictions and R2 score
        if np.isnan(theta).any():
            print(f"Skipping evaluation for alpha={alpha}, lambda={lam} due to NaN in theta.")
            continue

        y_pred = X.dot(theta)

        # Added check for NaN in y_pred before calculating R2 score
        if np.isnan(y_pred).any():
             print(f"Skipping R2 calculation for alpha={alpha}, lambda={lam} due to NaN in predictions.")
             continue

        r2 = r2_score(y, y_pred)
        final_cost = ridge_cost(X, y, theta, lam)

        if r2 > best_score:
            best_score = r2
            best_params = {'alpha': alpha, 'lambda': lam, 'cost': final_cost}
            best_theta = theta

print("\nHyperparameter tuning complete.")
print("✅ Best parameters found:")
print(best_params)
print("Best R² score:", round(best_score, 4))

# Step 7: Show final coefficients
print("\nFinal coefficients (theta):\n", best_theta)

Starting hyperparameter tuning with reduced learning rates...
Testing learning_rate: 0.0001, lambda: 1e-15
Testing learning_rate: 0.0001, lambda: 1e-10
Testing learning_rate: 0.0001, lambda: 1e-05
Testing learning_rate: 0.0001, lambda: 0.001
Testing learning_rate: 0.0001, lambda: 0
Testing learning_rate: 0.0001, lambda: 1
Testing learning_rate: 0.0001, lambda: 10
Testing learning_rate: 0.0001, lambda: 20
Testing learning_rate: 0.001, lambda: 1e-15
Testing learning_rate: 0.001, lambda: 1e-10
Testing learning_rate: 0.001, lambda: 1e-05
Testing learning_rate: 0.001, lambda: 0.001
Testing learning_rate: 0.001, lambda: 0
Testing learning_rate: 0.001, lambda: 1
Testing learning_rate: 0.001, lambda: 10
Testing learning_rate: 0.001, lambda: 20
Testing learning_rate: 0.01, lambda: 1e-15
Testing learning_rate: 0.01, lambda: 1e-10
Testing learning_rate: 0.01, lambda: 1e-05
Testing learning_rate: 0.01, lambda: 0.001
Testing learning_rate: 0.01, lambda: 0
Testing learning_rate: 0.01, lambda: 1
Test

QUES2

In [10]:


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

# ✅ Step 1: Load dataset from GitHub (works directly)
url = "https://raw.githubusercontent.com/JWarmenhoven/ISLR-python/master/Notebooks/Data/Hitters.csv"
hitters = pd.read_csv(url)

print("Dataset loaded successfully ✅")
print("Shape:", hitters.shape)
print("\nFirst few rows:\n", hitters.head())

# Step 2: Data preprocessing
print("\nMissing values before cleaning:\n", hitters.isnull().sum())

# Drop rows with null Salary
hitters.dropna(inplace=True)

# One-hot encode categorical columns
hitters_encoded = pd.get_dummies(hitters, drop_first=True)

print("\nAfter encoding shape:", hitters_encoded.shape)

# Step 3: Split into input and output
X = hitters_encoded.drop("Salary", axis=1)
y = hitters_encoded["Salary"]

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: Train models
lin_reg = LinearRegression().fit(X_train_scaled, y_train)
ridge_reg = Ridge(alpha=0.5748).fit(X_train_scaled, y_train)
lasso_reg = Lasso(alpha=0.5748).fit(X_train_scaled, y_train)

# Step 7: Evaluate
models = {
    "Linear Regression": lin_reg,
    "Ridge Regression": ridge_reg,
    "Lasso Regression": lasso_reg
}

print("\n--- Model Evaluation ---")
for name, model in models.items():
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"\n{name}")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"R² Score: {r2:.4f}")

# Step 8: Model comparison summary
print("\nConclusion:")
print("- Ridge Regression usually performs best because it reduces overfitting using L2 regularization.")
print("- Lasso can eliminate less important features, improving interpretability.")


Dataset loaded successfully ✅
Shape: (322, 21)

First few rows:
           Unnamed: 0  AtBat  Hits  HmRun  Runs  RBI  Walks  Years  CAtBat  \
0     -Andy Allanson    293    66      1    30   29     14      1     293   
1        -Alan Ashby    315    81      7    24   38     39     14    3449   
2       -Alvin Davis    479   130     18    66   72     76      3    1624   
3      -Andre Dawson    496   141     20    65   78     37     11    5628   
4  -Andres Galarraga    321    87     10    39   42     30      2     396   

   CHits  ...  CRuns  CRBI  CWalks  League Division PutOuts  Assists  Errors  \
0     66  ...     30    29      14       A        E     446       33      20   
1    835  ...    321   414     375       N        W     632       43      10   
2    457  ...    224   266     263       A        W     880       82      14   
3   1575  ...    828   838     354       N        E     200       11       3   
4    101  ...     48    46      33       N        E     805       40    

QUES3

In [11]:

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

print("Dataset loaded successfully ✅")
print("Shape:", X.shape)
print("Features:", list(X.columns))

# Step 2: Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Standardize (important for regularized regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: RidgeCV - automatically selects the best alpha
ridge_alphas = [0.1, 1, 10, 100]
ridge_cv = RidgeCV(alphas=ridge_alphas, cv=5)
ridge_cv.fit(X_train_scaled, y_train)

# Step 5: LassoCV - automatically selects the best alpha
lasso_cv = LassoCV(alphas=np.logspace(-3, 3, 50), cv=5, random_state=42)
lasso_cv.fit(X_train_scaled, y_train)

# Step 6: Evaluate both
models = {
    "RidgeCV": ridge_cv,
    "LassoCV": lasso_cv
}

print("\n--- Model Evaluation ---")
for name, model in models.items():
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"\n{name}")
    print(f"Best Alpha: {model.alpha_:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Step 7: Compare models
print("\nConclusion:")
print("- RidgeCV usually performs slightly better on correlated features.")
print("- LassoCV may set some coefficients to zero → feature selection.")
print("- The chosen alpha shows how much regularization the model needs.")


Dataset loaded successfully ✅
Shape: (20640, 8)
Features: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']

--- Model Evaluation ---

RidgeCV
Best Alpha: 0.1000
Mean Squared Error: 0.5559
R² Score: 0.5758

LassoCV
Best Alpha: 0.0010
Mean Squared Error: 0.5545
R² Score: 0.5769

Conclusion:
- RidgeCV usually performs slightly better on correlated features.
- LassoCV may set some coefficients to zero → feature selection.
- The chosen alpha shows how much regularization the model needs.


QUES4

In [12]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

print("Classes:", class_names)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

# Step 2: Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 3: Feature scaling (important for gradient-based algorithms)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Create Logistic Regression model
# 'ovr' = One-vs-Rest, 'multinomial' = Softmax
log_reg = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=200)
log_reg.fit(X_train_scaled, y_train)

# Step 5: Predictions
y_pred = log_reg.predict(X_test_scaled)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Accuracy:", round(accuracy, 3))

print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=class_names))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classes: ['setosa' 'versicolor' 'virginica']
Shape of X: (150, 4)
Shape of y: (150,)

Model Accuracy: 0.9

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.89      0.80      0.84        10
   virginica       0.82      0.90      0.86        10

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30


Confusion Matrix:
 [[10  0  0]
 [ 0  8  2]
 [ 0  1  9]]


