In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


**Problem 1**: Preparation of the data

In [2]:
dataframe1 = pd.read_csv('Walmart.csv')
dataframe1.shape

(6435, 8)

In [3]:
dataframe1=dataframe1.drop(columns=['Store','Date','Holiday_Flag'])
dataframe1.head()

Unnamed: 0,Weekly_Sales,Temperature,Fuel_Price,CPI,Unemployment
0,1643690.9,42.31,2.572,211.096358,8.106
1,1641957.44,38.51,2.548,211.24217,8.106
2,1611968.17,39.93,2.514,211.289143,8.106
3,1409727.59,46.63,2.561,211.319643,8.106
4,1554806.68,46.5,2.625,211.350143,8.106


In [4]:
dataframe1=dataframe1.iloc[:800]
dataframe1.shape

(800, 5)

**Problem 2**: Splitting the dataset

In [5]:
X=np.array(dataframe1).astype(float)
random.shuffle(X)

X_train, X_test = train_test_split(X, test_size=0.4)  # burada train seti std yapabilirsin istersen!!
X_test.shape

(320, 5)

**Problem 3**: Elastic-Net regression

$\min_{x} f(x) = \frac{1}{2} \|Ax - b\|_2^2 + \lambda \|x\|_2^2 + \rho \|x\|_1$

$\nabla f(x) = A^T(Ax - b) + 2\lambda x + \rho \text{sign}(x)$

$\nabla f(x) = 0,  (A^TA + 2\lambda I)x = A^Tb - \rho \text{sign}(x)$

$x = (A^TA + 2\lambda I)^{-1}(A^Tb - \rho \text{sign}(x))$

In [6]:
A=np.hstack((np.ones((X_train.shape[0],1)),X_train[:,1:]))
b=X_train[:,0]    #standardized !!!
b=(b-np.mean(b))/np.std(b) 
A.shape,b.shape

((480, 5), (480,))

In [7]:
#Test:
A_test=np.hstack((np.ones((X_test.shape[0],1)),X_test[:,1:]))
b_test=X_test[:,0]
b_test=(b_test-np.mean(b_test))/np.std(b_test)
A_test.shape,b_test.shape

((320, 5), (320,))

In [8]:
lamda1=0.8
ro1=1-lamda1
#first step all x positive.
x=np.ones(A.shape[1])
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x))
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x)) #on the second attempt all signs are correct.
print(x)

[-1. -1.  1. -1.  1.]
[-1. -1.  1. -1.  1.]
[-0.39754677 -0.0102916   0.58909764 -0.01966459  0.44936581]


x vector is found with $\nabla f(x) = 0$ equation.

In [9]:
train_loss_func=1/2*np.linalg.norm(np.matmul(A,x)-b)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
test_loss_func=1/2*np.linalg.norm(np.matmul(A_test,x)-b_test)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
train_loss_func,test_loss_func,(train_loss_func/test_loss_func) #there is no n in loss function so train loss>test loss !!

(187.96071066781383, 127.75522800833937, 1.471256508231072)

TEST DATA $R^2Score$:

In [10]:
y_pred=np.matmul(A_test,x)
y_actu=b_test

ss_res=np.sum((y_pred-y_actu)**2)
ss_tot=np.sum((y_actu-np.sum(y_actu)/y_actu.shape)**2)
r2score= 1-ss_res/ss_tot
print("R² score:", r2score)

R² score: 0.20689979145296888


In [11]:
A_all=np.append(A,A_test,axis=0)
y_pred=np.matmul(A_all,x)
y_actu=np.append(b,b_test)
print("R² score:", r2_score(y_actu, y_pred))

R² score: 0.215006126513689


**Problem 4**: Choosing the optimal parameters

In [12]:
lamda1=0.00001
ro1=0.00001
#first step all x positive.
x=np.ones(A.shape[1])
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x))
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x)) #on the second attempt all signs are correct.
print(x)

train_loss_func=1/2*np.linalg.norm(np.matmul(A,x)-b)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
test_loss_func=1/2*np.linalg.norm(np.matmul(A_test,x)-b_test)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
print(train_loss_func,test_loss_func,(train_loss_func/test_loss_func)) #there is no n in loss function so train loss>test loss !!

[-1. -1.  1. -1.  1.]
[-1. -1.  1. -1.  1.]
[-1.0494488  -0.00976855  0.6642338  -0.01984175  0.50518241]
186.7604906605606 127.0882799965898 1.4695335452299143


Minimum Train and Loss function found in (lamda1=0.00001, ro1=0.00001).

**Problem 5**: Results and comparison

$y_{pred}=A_{test}x$

$y_{actu}=b_{test}$

$MLRegression = \frac{1}{2} \|A_{test}x - b_{test}\|_2^2$

$MSE=\frac{1}{n} \sum_{i=1}^{n}\left((y_{actu}-y_{pred})^2\right)$

$MLRegression =  \frac{1}{2} \|y_{pred} - y_{actu}\|_2^2 =  \frac{1}{2} \sum_{i=1}^{n}\left((y_{actu}-y_{pred})^2\right) = MSE*\frac{n}{2}$


In [13]:
y_pred=np.matmul(A_test,x)
y_actu=b_test

MSE=np.mean((y_pred-y_actu)**2)
MLR=1/2*np.linalg.norm(np.matmul(A_test,x)-b_test)**2
print('MSE:',MSE,'\nML Regression:',MLR,'MLR/MSE:',MLR/MSE)

MSE: 0.794301497058496 
ML Regression: 127.08823952935937 MLR/MSE: 160.00000000000003


**Problem 6**: Another experimental setup

In [14]:
X_train, X_test = train_test_split(X, test_size=0.6)  # burada train seti std yapabilirsin istersen!!

A=np.hstack((np.ones((X_train.shape[0],1)),X_train[:,1:]))
b=X_train[:,0]    #standardized !!!
b=(b-np.mean(b))/np.std(b) 
A_test=np.hstack((np.ones((X_test.shape[0],1)),X_test[:,1:]))
b_test=X_test[:,0]
b_test=(b_test-np.mean(b_test))/np.std(b_test)

lamda1=0.00001
ro1=0.00001
#first step all x positive.
x=np.ones(A.shape[1])
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x))
x=np.matmul(np.linalg.inv(np.matmul(A.T,A)+2*lamda1*np.eye(A.shape[1])), np.matmul(A.T, b)-ro1*np.sign(x))
print(np.sign(x)) #on the second attempt all signs are correct.
print(x)

train_loss_func=1/2*np.linalg.norm(np.matmul(A,x)-b)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
test_loss_func=1/2*np.linalg.norm(np.matmul(A_test,x)-b_test)**2+lamda1*np.linalg.norm(x)**2+ro1*np.linalg.norm(x,1)
print(train_loss_func,test_loss_func,(train_loss_func/test_loss_func)) #there is no n in loss function so train loss>test loss !!

y_pred=np.matmul(A_test,x)
y_actu=b_test

MSE=np.mean((y_pred-y_actu)**2)
MLR=1/2*np.linalg.norm(np.matmul(A_test,x)-b_test)**2
print('MSE:',MSE,'\nML Regression:',MLR,'MLR/MSE:',MLR/MSE)

[-1. -1.  1. -1.  1.]
[-1. -1.  1. -1.  1.]
[-0.66565781 -0.00923903  0.64201345 -0.01639249  0.35387869]
128.1908603618097 185.28494986075617 0.6918579218557506
MSE: 0.772020513251182 
ML Regression: 185.28492318028367 MLR/MSE: 239.99999999999997


Difference between 0.4 and 0.6 train set is 0.02 in MSE. Is that means Linear model does not need much data to train??

**Problem 7**: Logistic Regression with $L_1$ penalty

In [15]:
dataframe2 = pd.read_csv('breast-cancer.csv')
dataframe2.shape

(569, 32)

In [16]:
dataframe2=dataframe2.drop(columns=['id'])
dataframe2.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [17]:
X=np.array(dataframe2)[:,1:].astype(float)
Y=np.array([ i=='M' for i in np.array(dataframe2)[:,0]])
AA=np.hstack((X,Y.reshape(-1,1)))
random.shuffle(AA)

X_train, X_test = train_test_split(AA, test_size=0.4)  # burada train seti std yapabilirsin istersen!!
X_test.shape

(228, 31)