
# que1


In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import itertools

np.random.seed(42)
n_samples = 500
X1 = np.random.rand(n_samples, 1) * 10
X2 = X1 + np.random.normal(0, 0.5, (n_samples, 1))
X3 = X1 * 0.5 + np.random.normal(0, 0.3, (n_samples, 1))
X4 = X1 * 2 + np.random.normal(0, 1, (n_samples, 1))
X5 = X1 + X2 + np.random.normal(0, 0.5, (n_samples, 1))
X6 = X3 + X4 + np.random.normal(0, 0.5, (n_samples, 1))
X7 = X1 * 3 + np.random.normal(0, 1, (n_samples, 1))
X = np.hstack((X1, X2, X3, X4, X5, X6, X7))
y = 3*X1 + 2*X2 - X3 + 0.5*X4 + np.random.normal(0, 2, (n_samples, 1))
y = y.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
def ridge_regression_gd(X, y, lr=0.01, lambda_=0.1, epochs=1000):
    m, n = X.shape
    X_b = np.c_[np.ones((m, 1)), X]
    theta = np.zeros((n + 1, 1))
    y = y.reshape(-1, 1)
    for _ in range(epochs):
        y_pred = X_b.dot(theta)
        error = y_pred - y
        gradient = (1/m) * (X_b.T.dot(error) + lambda_ * np.r_[[[0]], theta[1:]])
        theta -= lr * gradient
        if np.isnan(theta).any() or np.isinf(theta).any():
            return None
    return theta

def ridge_cost(X, y, theta, lambda_):
    m = len(y)
    X_b = np.c_[np.ones((m, 1)), X]
    y_pred = X_b.dot(theta)
    return (1/(2*m)) * np.sum((y_pred - y.reshape(-1,1))**2) + (lambda_/(2*m)) * np.sum(theta[1:]**2)



In [4]:
learning_rates = [0.0001, 0.001, 0.01]
lambdas = [1e-10, 1e-5, 1e-3, 0, 1, 10, 20]
results = []

for lr, lam in itertools.product(learning_rates, lambdas):
    theta = ridge_regression_gd(X_train, y_train, lr=lr, lambda_=lam, epochs=2000)
    if theta is None:
        continue
    cost = ridge_cost(X_train, y_train, theta, lam)
    X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]
    y_pred = X_test_b.dot(theta)
    if np.isnan(y_pred).any() or np.isinf(y_pred).any():
        continue
    r2 = r2_score(y_test, y_pred)
    results.append((lr, lam, cost, r2))

if results:
    df_results = pd.DataFrame(results, columns=["Learning Rate", "Lambda", "Cost", "R2_Score"])
    best = df_results.loc[df_results["R2_Score"].idxmax()]
    print(df_results.sort_values(by="R2_Score", ascending=False).head(10))
    print("\nBest Parameters:\n", best)
else:
    print("No valid parameter combinations found.")


    Learning Rate        Lambda       Cost  R2_Score
17          0.010  0.000000e+00   2.232516  0.982769
14          0.010  1.000000e-10   2.232516  0.982769
15          0.010  1.000000e-05   2.232517  0.982769
16          0.010  1.000000e-03   2.232580  0.982768
18          0.010  1.000000e+00   2.296009  0.982718
19          0.010  1.000000e+01   2.835258  0.982284
20          0.010  2.000000e+01   3.388439  0.981853
13          0.001  2.000000e+01  10.470367  0.912590
12          0.001  1.000000e+01   9.981860  0.912503
11          0.001  1.000000e+00   9.539018  0.912403

Best Parameters:
 Learning Rate    0.010000
Lambda           0.000000
Cost             2.232516
R2_Score         0.982769
Name: 17, dtype: float64


# que2 

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error

data = pd.read_csv("C:\\Users\\Win10\\Downloads\\Hitters (1).csv")

data = data.dropna()

categorical_cols = data.select_dtypes(include=['object']).columns
data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

X = data.drop("Salary", axis=1)
y = data["Salary"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

linear = LinearRegression()
linear.fit(X_train, y_train)
y_pred_linear = linear.predict(X_test)
r2_linear = r2_score(y_test, y_pred_linear)
mse_linear = mean_squared_error(y_test, y_pred_linear)

ridge = Ridge(alpha=0.5748)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)

lasso = Lasso(alpha=0.5748)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
r2_lasso = r2_score(y_test, y_pred_lasso)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)

results = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge Regression", "Lasso Regression"],
    "R2 Score": [r2_linear, r2_ridge, r2_lasso],
    "MSE": [mse_linear, mse_ridge, mse_lasso]
})

print(results)
best_model = results.loc[results["R2 Score"].idxmax(), "Model"]
print("\nBest Performing Model:", best_model)


               Model  R2 Score            MSE
0  Linear Regression  0.290745  128284.345497
1   Ridge Regression  0.299789  126648.594229
2   Lasso Regression  0.299065  126779.466413

Best Performing Model: Ridge Regression


  model = cd_fast.enet_coordinate_descent(


# que3


In [11]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

data = fetch_california_housing()
X, y = data.data, data.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

ridge = RidgeCV(alphas=[1e-15, 1e-10, 1e-5, 1e-3, 0.1, 1, 10, 20], cv=5)
ridge.fit(X_scaled, y)
ridge_pred = ridge.predict(X_scaled)
ridge_r2 = r2_score(y, ridge_pred)

lasso = LassoCV(alphas=[1e-15, 1e-10, 1e-5, 1e-3, 0.1, 1, 10, 20], cv=5, max_iter=10000)
lasso.fit(X_scaled, y)
lasso_pred = lasso.predict(X_scaled)
lasso_r2 = r2_score(y, lasso_pred)

print("Ridge Best Alpha:", ridge.alpha_)
print("Ridge R2 Score:", ridge_r2)
print("Lasso Best Alpha:", lasso.alpha_)
print("Lasso R2 Score:", lasso_r2)


  model = cd_fast.enet_coordinate_descent_gram(


Ridge Best Alpha: 20.0
Ridge R2 Score: 0.6062156400080905
Lasso Best Alpha: 0.001
Lasso R2 Score: 0.6061906372004509


# que4

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = OneVsRestClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print("Multiclass Logistic Regression Accuracy:", acc)


Multiclass Logistic Regression Accuracy: 0.9666666666666667
