In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
np.random.seed(42)
n_samples = 300

# Create correlated features
X1 = np.random.rand(n_samples)
X2 = X1 + np.random.normal(0, 0.02, n_samples)
X3 = X1 * 0.9 + np.random.normal(0, 0.02, n_samples)
X4 = X1 * 1.1 + np.random.normal(0, 0.02, n_samples)
X5 = X2 + X3 + np.random.normal(0, 0.03, n_samples)
X6 = X3 - X1 + np.random.normal(0, 0.01, n_samples)
X7 = X4 + X5 + np.random.normal(0, 0.03, n_samples)

In [4]:
X = np.column_stack((X1, X2, X3, X4, X5, X6, X7))

y = 3*X1 + 2*X2 - 1.5*X3 + 0.5*X4 + np.random.normal(0, 0.1, n_samples)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
def ridge_regression_gd(X, y, lr, lam, iterations=1000):
    m, n = X.shape
    beta = np.zeros(n)
    cost_history = []

    for i in range(iterations):
        y_pred = X.dot(beta)
        error = y_pred - y
        grad = (1/m) * (X.T.dot(error)) + 2 * lam * beta
        beta -= lr * grad
        cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
        cost_history.append(cost)
    return beta, cost_history[-1]

In [6]:
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

results = []

for lr in learning_rates:
    for lam in lambdas:
        beta, cost = ridge_regression_gd(X_train, y_train, lr, lam)
        if np.isnan(cost):  # skip invalid
            continue
        
        y_pred = X_test.dot(beta)
        if np.any(np.isnan(y_pred)):  # skip invalid
            continue

        r2 = r2_score(y_test, y_pred)
        results.append((lr, lam, cost, r2))

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
  cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
  numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
  cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
  cost = (1/(2*m)) * np.sum(error**2) + lam * np.sum(beta**2)
  grad = (1/m) * (X.T.dot(error)) + 2 * lam * beta


In [7]:
df_results = pd.DataFrame(results, columns=['Learning Rate', 'Lambda', 'Cost', 'R2_Score'])
best_row = df_results.loc[df_results['R2_Score'].idxmax()]
print("\nBest Parameters:")
print(best_row)


Best Parameters:
Learning Rate    0.100000
Lambda           1.000000
Cost             2.432217
R2_Score        -1.990819
Name: 29, dtype: float64


In [8]:
hitters=pd.read_csv("Hitters (1).csv")
print("Initial shape:", hitters.shape)
print(hitters.head())

Initial shape: (322, 20)
   AtBat  Hits  HmRun  Runs  RBI  Walks  Years  CAtBat  CHits  CHmRun  CRuns  \
0    293    66      1    30   29     14      1     293     66       1     30   
1    315    81      7    24   38     39     14    3449    835      69    321   
2    479   130     18    66   72     76      3    1624    457      63    224   
3    496   141     20    65   78     37     11    5628   1575     225    828   
4    321    87     10    39   42     30      2     396    101      12     48   

   CRBI  CWalks League Division  PutOuts  Assists  Errors  Salary NewLeague  
0    29      14      A        E      446       33      20     NaN         A  
1   414     375      N        W      632       43      10   475.0         N  
2   266     263      A        W      880       82      14   480.0         A  
3   838     354      N        E      200       11       3   500.0         N  
4    46      33      N        E      805       40       4    91.5         N  


In [9]:
hitters = hitters.dropna(subset=["Salary"])   # Drop rows where target is missing
hitters.fillna(hitters.mean(numeric_only=True), inplace=True)  # Fill other missing values

for col in hitters.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    hitters[col] = le.fit_transform(hitters[col].astype(str))

X = hitters.drop("Salary", axis=1)
y = hitters["Salary"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [10]:
# Initialize models
linear_model = LinearRegression()
ridge_model = Ridge(alpha=0.5748)
lasso_model = Lasso(alpha=0.5748, max_iter=10000)

# Train models
linear_model.fit(X_train, y_train)
ridge_model.fit(X_train, y_train)
lasso_model.fit(X_train, y_train)

# Predict
y_pred_linear = linear_model.predict(X_test)
y_pred_ridge = ridge_model.predict(X_test)
y_pred_lasso = lasso_model.predict(X_test)

# Evaluate
models = {
    "Linear Regression": (y_pred_linear, linear_model),
    "Ridge Regression": (y_pred_ridge, ridge_model),
    "Lasso Regression": (y_pred_lasso, lasso_model),
}

results = []
for name, (y_pred, model) in models.items():
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    results.append([name, round(r2, 4), round(mse, 2)])

df_results = pd.DataFrame(results, columns=["Model", "R2_Score", "MSE"])
print(df_results)


               Model  R2_Score        MSE
0  Linear Regression    0.3806  150406.58
1   Ridge Regression    0.4014  145355.63
2   Lasso Regression    0.3960  146667.23


In [11]:
boston=pd.read_csv("Boston_Housing.csv")
print("Initial shape:", boston.shape)
print(boston.head())

Initial shape: (506, 14)
      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD  TAX  PTRATIO  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        B  LSTAT  MEDV  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  


In [12]:
boston = pd.read_csv("Boston_Housing.csv")
print("Initial shape:", boston.shape)
print(boston.head())

Initial shape: (506, 14)
      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD  TAX  PTRATIO  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        B  LSTAT  MEDV  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  


In [None]:
X = boston.drop(columns=['MEDV'])  # MEDV is the target column
y = boston['MEDV']

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
