In [72]:
import pandas as pd
import numpy as np

### Loading Data

In [74]:
df = pd.read_csv("C:/Users/birolduru/Downloads/Fish.csv") 

In [75]:
df.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


### Append two random columns

In [77]:
df["Rand1"] = np.random.uniform(low=10, high=100, size=(df.shape[0],)) #since it is not specified which distribution to use in hw pdf, I prefer uniform.
df["Rand2"] = np.random.randint(low=1, high=7, size=(df.shape[0],))

In [78]:
df.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width,Rand1,Rand2
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02,50.956662,2
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056,12.9035,6
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961,31.944006,2
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555,88.22903,5
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134,23.135758,2


In [79]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


### Model 1

In [81]:
X1 = df.drop(["Weight", "Species"], axis=1) #species column is categorical that's why it is removed.
y1 = df["Weight"] #target column

model1 = LinearRegression()
model1.fit(X1, y1)

y_pred1 = model1.predict(X1)
r2_1 = r2_score(y1, y_pred1)

print("Model 1 R2:", r2_1)
print("Coefficients:", model1.coef_)
print("Intercept:", model1.intercept_)


Model 1 R2: 0.8858492728407967
Coefficients: [ 63.36764206  -6.18946238 -30.17223712  28.786984    21.09949099
   0.27528563   2.95567952]
Intercept: -524.1146599542952


### Model 2

In [83]:
X2 = df.drop(["Weight", "Species", "Rand1"], axis=1) #drop rand1 column also
y2 = df["Weight"]

model2 = LinearRegression()
model2.fit(X2, y2)

y_pred2 = model2.predict(X2)
r2_2 = r2_score(y2, y_pred2)

print("Model 2 R2:", r2_2)
print("Coefficients:", model2.coef_)
print("Intercept:", model2.intercept_)


Model 2 R2: 0.8854379016162425
Coefficients: [ 62.34895461  -6.1687477  -29.39083618  28.43359132  22.40094351
   2.49727536]
Intercept: -508.00190619611203


### Model 3

In [85]:
X3 = df.drop(["Weight", "Species", "Rand1", "Rand2"], axis=1) #drop rand2 column too
y3 = df["Weight"]

model3 = LinearRegression()
model3.fit(X3, y3)

y_pred3 = model3.predict(X3)
r2_3 = r2_score(y3, y_pred3)

print("Model 3 R2:", r2_3)
print("Coefficients:", model3.coef_)
print("Intercept:", model3.intercept_)


Model 3 R2: 0.8852867046546207
Coefficients: [ 62.35521443  -6.52675249 -29.02621861  28.29735132  22.47330665]
Intercept: -499.5869553569422


### Model 4

In [87]:
df_top2 = df.head(2) #just top two recording

X4 = df_top2.drop(["Weight", "Species"], axis=1)
y4 = df_top2["Weight"]

model4 = LinearRegression()
model4.fit(X4, y4)

y_pred4 = model4.predict(X4)
r2_4 = r2_score(y4, y_pred4)

print("Model 4 R2:", r2_4)
print("Coefficients:", model4.coef_)
print("Intercept:", model4.intercept_)


Model 4 R2: 1.0
Coefficients: [ 0.02615917  0.02942907  0.03923876  0.03139101  0.00933882 -1.24429907
  0.13079587]
Intercept: 302.21301477913227


### Comparison of Models

Dropping Rand1 and Rand2 columns nearly didn't change model's performance. This means they are not related with target variable. In model 4, R2 score is 1 because of lack of training data. We just have 2 rows and this leads to overfitting (high variance and low bias). We also see that length1 is the most related variable with target variable

### Optimization Task

In [91]:
from scipy.optimize import minimize

X_opt = df.drop(["Weight", "Species"], axis=1).values #those are matrix anymore not df
y_opt = df["Weight"].values #matrix

X_opt2 = np.column_stack([np.ones(len(X_opt)), X_opt]) #We are adding columns of 1 for intercept term



In [92]:
def loss_function(beta, X, y): #This is our objective function, beta is our coefficients, x features , Y prediction
    y_pred = X.dot(beta) #matrix multiplication (dot product)
    return np.sum((y - y_pred)**2) #sum of squared errors


In [93]:
beta0 = np.zeros(X_opt2.shape[1]) #to use minimize function, we have to give initial values for our beta values

result = minimize(loss_function, beta0, args=(X_opt2, y_opt))
beta_opt = result.x
beta_opt

print("Optimized Coefficients:")
np.set_printoptions(suppress=True) 
print(beta_opt)



Optimized Coefficients:
[-524.11488548   63.36722055   -6.18968393  -30.17169241   28.78667872
   21.10026725    0.27528442    2.95566604]


### Optimization Report

As you see, intercepts and coefficients of Model 1 and Scipy Optimization Model are same. Predictions of two model are equivalent

### Regularization Task

### Lasso with Upper Bound

In [98]:
X = df.drop(["Weight", "Species"], axis=1).values
y = df["Weight"].values

X2 = np.column_stack([np.ones(len(X)), X])

In [99]:
def loss(beta):
    y_pred = X2.dot(beta)
    return np.sum((y - y_pred)**2)


In [100]:
bounds = [(-1000, 1000)] * X2.shape[1]
beta0 = np.zeros(X2.shape[1])

res_lasso_bound = minimize(loss, beta0, bounds=bounds)
beta_lasso_bound = res_lasso_bound.x

print("Lasso with upper bound coefficients:")
np.set_printoptions(suppress=True) # e'li notasyonu önlemek için.
print(beta_lasso_bound)


Lasso with upper bound coefficients:
[-524.11438216   63.34356722   -6.16714892  -30.17151789   28.78536178
   21.09733818    0.27529307    2.95526631]


In [101]:
bounds = [(-100, 100)] * X2.shape[1]
beta0 = np.zeros(X2.shape[1])

res_lasso_bound2 = minimize(loss, beta0, bounds=bounds)
beta_lasso_bound2 = res_lasso_bound2.x

print("Bound = 100 coefficients:")
np.set_printoptions(suppress=True)
print(beta_lasso_bound2)


Bound = 100 coefficients:
[-100.          100.          -30.20097858  -43.82726561   35.70066696
   -0.77306549   -1.75935733  -26.5877338 ]


In [102]:
bounds = [(-50, 50)] * X2.shape[1]
beta0 = np.zeros(X2.shape[1])

res_lasso_bound3 = minimize(loss, beta0, bounds=bounds)
beta_lasso_bound3 = res_lasso_bound3.x

print("Bound = 50 coefficients:")
np.set_printoptions(suppress=True)
print(beta_lasso_bound3)


Bound = 50 coefficients:
[-50.          50.          17.50705848 -44.60386343  33.05978478
  -3.39321941  -2.05164393 -30.60129988]


When the upper bound is 1000, the coefficients are nearly identical to those in Model 1, but as the upper bound is reduced, the coefficients move closer to zero.

### Lasso with Penalty

In [105]:
def loss_l1(beta, lam): #Objective function with lasso penalty term
    y_pred = X2.dot(beta)
    return np.sum((y - y_pred)**2) + lam * np.sum(np.abs(beta))


In [106]:
lam = 0.0001
beta0 = np.zeros(X2.shape[1])

res_lasso_l1 = minimize(lambda b: loss_l1(b, lam), beta0)
beta_lasso_l1 = res_lasso_l1.x

print("Lasso with Lagrange penalty coefficients:")
np.set_printoptions(suppress=True)
print(beta_lasso_l1)


Lasso with Lagrange penalty coefficients:
[-524.11526375   63.36787519   -6.18970285  -30.17220396   28.78699425
   21.09944324    0.27528881    2.95573076]


In [107]:
lam = 1
beta0 = np.zeros(X2.shape[1])

res_lasso_l1_2 = minimize(lambda b: loss_l1(b, lam), beta0)
beta_lasso_l1_2 = res_lasso_l1_2.x

print("Lasso with Lagrange penalty coefficients (lambda = 1):")
np.set_printoptions(suppress=True)
print(beta_lasso_l1_2)


Lasso with Lagrange penalty coefficients (lambda = 1):
[-524.06712739   63.26730166   -6.08976487  -30.17617591   28.7836119
   21.08667315    0.27499359    2.9507305 ]


In [108]:
lam = 100
beta0 = np.zeros(X2.shape[1])

res_lasso_l1_3 = minimize(lambda b: loss_l1(b, lam), beta0)
beta_lasso_l1_3 = res_lasso_l1_3.x

print("Lasso with Lagrange penalty coefficients (lambda = 100):")
np.set_printoptions(suppress=True)
print(beta_lasso_l1_3)


Lasso with Lagrange penalty coefficients (lambda = 100):
[-519.08770259   56.57990272   -0.00000001  -29.91895649   28.35425533
   20.40427869    0.24720703    2.45637437]


With λ = 0.0001, the regularization impact is minimal. The coefficients decrease only slightly and stay very close to those of Model 1. Increasing λ would make the regularization effect much stronger.

### Lasso with Sklearn

In [111]:
from sklearn.linear_model import Lasso

lasso_sklearn = Lasso(alpha=0.0001)
lasso_sklearn.fit(df.drop(["Weight", "Species"], axis=1), y)

print("Sklearn Lasso coefficients:")
print(lasso_sklearn.coef_)
print("Intercept:", lasso_sklearn.intercept_)


Sklearn Lasso coefficients:
[ 61.29984105  -7.54448443 -27.11730964  27.34854417  23.67570155
   0.27035532   2.90695315]
Intercept: -524.7616159298939


  model = cd_fast.enet_coordinate_descent(


Sklearn’s Lasso scales the squared error by (2 * n_samples). Because of this difference in the loss function, we must divide the alpha parameter by (2 * n_samples) to match the results of our earlier Lasso implementations.