In [2]:
# Mahdi Anvari 610700002
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.metrics import mean_squared_error,r2_score

In [4]:
# question 1.a
# It generates synthetic data for a linear regression problem : 
# a design matrix X, a coefficient vector Beta, and adds noise to simulate a response vector Y.

def GenerateData(n, p,q):
    X = np.random.normal(size=(n, p))
    Beta = np.random.normal(size=(p, 1))
    SelectedColumns = np.random.choice(p, q, replace=False)
    SelectedX = X[:, SelectedColumns]
    SelectedBeta = Beta[SelectedColumns]
    NewBeta = np.zeros_like(Beta)
    NewBeta[SelectedColumns] = SelectedBeta
    Epsilon = np.random.normal(scale=0.05, size=(n, 1))
    Y = X.dot(NewBeta) + Epsilon
    return(Y,X,NewBeta)

Data = GenerateData(100,10,8)
y = Data[0]
x = Data[1]
Beta = Data[2]
print(Beta)
x_train = x[:80,]
x_test = x[80:,]
y_train = y[:80]
y_test = y[80:]
lr = LinearRegression()
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)
print(mse)
print(r2)

[[ 0.        ]
 [ 0.        ]
 [-0.5732294 ]
 [ 1.55516843]
 [-2.09398694]
 [-0.5697091 ]
 [-1.16876101]
 [ 0.84267817]
 [ 1.65243229]
 [ 1.20983559]]
0.0014815896486944794
0.9998992643324027


In [3]:
# question 1.b
# It computes the coefficients of a linear regression model using the closed-form solution

def ClosedForm(X, Y):
    Beta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y)
    return(Beta)

print(ClosedForm(x,y))

[[ 1.80277949]
 [-0.24605351]
 [-0.00296861]
 [ 0.02103424]
 [ 0.36499013]]


In [4]:
# question 1.c
# It performs gradient descent optimization to estimate the coefficients of a linear regression model

def GradientDescent(X,Y,Alpha=0.001,TH=0.0001,MaxIter=10000):
    n = np.shape(X)[1]
    p = np.shape(X)[1]
    Beta = np.zeros((p, 1))
    PrevCost = float('inf')
    for _ in range(MaxIter):
        PredY = X.dot(Beta)
        Error = PredY - Y
        Cost = np.sum(Error ** 2)/2
        Gradient = X.T.dot(Error)
        Beta -= Alpha * Gradient
        if abs(Cost-PrevCost) < TH:
            break
        PrevCost = Cost
    return(Beta)

print(GradientDescent(x,y))

[[ 1.73727378]
 [-0.21001671]
 [-0.02304932]
 [ 0.03147785]
 [ 0.30811234]]


In [5]:
# question 1.d
# It utilizes scikit-learn's LinearRegression model to estimate the coefficients of a linear regression model

def SciKitLinearReg(X,Y):
    n = np.shape(X)[1]
    Model = LinearRegression()
    Model.fit(X,Y)
    return np.array(Model.coef_).reshape(n,1)

print(SciKitLinearReg(x,y))

[[ 1.81845799e+00]
 [-2.46773301e-01]
 [-1.01382127e-03]
 [ 2.55283818e-02]
 [ 3.69818868e-01]]


In [6]:
# question 1.e
# It implements the LASSO regression algorithm
# Within the function, both X and Y are normalized
# In each iteration, it updates the coefficient estimates Beta by considering the L1 penalty term and the gradient of the cost function

def NormalizeVector(vector):
    mean = np.mean(vector)
    normalized_vector = vector - mean
    norm = np.linalg.norm(normalized_vector)
    return normalized_vector / norm

def NormalizeMatrix(matrix):
    col_means = np.mean(matrix, axis=0)
    normalized_matrix = matrix - col_means
    col_norms = np.linalg.norm(normalized_matrix, axis=0)
    return normalized_matrix / col_norms

def LASSOreg(X,Y,Lambda=0.0001,TH=0.0001,MaxIter=1000):
    X = NormalizeMatrix(X)
    Y = NormalizeMatrix(Y)
    p = np.shape(X)[1]
    Beta = np.zeros((p, 1))
    PrevCost = float('inf')
    for _ in range(MaxIter):
        for i in range(p):
            PredY = X.dot(Beta)
            XK = (X[:, i])
            Error = Y - PredY + (XK*Beta[i]).reshape(10,1)
            NewXK = (XK.T.reshape(10,1))
            Gradient = NewXK.T.dot(Error)
            if Gradient < -Lambda:
                Beta[i] = (Gradient + Lambda)
            elif Gradient > Lambda:
                Beta[i] = (Gradient - Lambda)
            else:
                Beta[i] = 0
        Cost = np.sum(Error ** 2) / 2
        if abs(Cost-PrevCost) < TH:
            break
        PrevCost = Cost
    return(Beta)

print(LASSOreg(x,y))

[[ 1.21446822e+00]
 [-2.28894495e-01]
 [-8.59626072e-04]
 [ 2.47606844e-02]
 [ 3.58627606e-01]]


In [7]:
# question 1.f
# It utilizes scikit-learn's Lasso model to perform LASSO regression

def SciKitLASSO(X,Y):
    n = np.shape(X)[1]
    Model = Lasso(alpha=0.001 , max_iter=1000)
    Model.fit(X,Y)
    return np.array(Model.coef_).reshape(n,1)

print(SciKitLASSO(x,y))

[[ 1.81229170e+00]
 [-2.44645290e-01]
 [-4.31411920e-04]
 [ 2.43587663e-02]
 [ 3.66703533e-01]]


In [8]:
# question 2

# question 2.a

![question 2.a](C:/Users/M/Desktop/2aa.jpg)
![question 2.a](C:/Users/M/Desktop/2ab.jpg)

# question 2.b

![question 2.a](C:/Users/M/Desktop/2ba.jpg)

In [None]:
# supplementary data 1

In [None]:
# supplementary data 2

![](C:/Users/M/Desktop/Screenshot(3510).png)
![](C:/Users/M/Desktop/Screenshot(3511).png)