In [65]:
import numpy as np
import pandas as pd

## Data preprocessing


In [66]:
df = pd.read_csv('Boston.csv')

In [67]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [68]:
df.isna().sum()

crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
black      0
lstat      0
medv       0
dtype: int64

In [69]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [70]:
# normal the data
from sklearn.preprocessing import MinMaxScaler

# initize a scaler
scaler_X = MinMaxScaler()

# fit and transform
X_scaled = scaler_X.fit_transform(X)


In [71]:
# split the dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=0)

## Subgradient

In [29]:
from weighted_elastic_net import WEN

# subgradient, W = I
net1 = WEN(step_size = 1e-3, max_iter = 5000)
net1.fit(X_train, y_train)
y_predict_1 = net1.predict(X_test)
RSS_1 = np.linalg.norm(y_test - y_predict_1)/len(y_test)

In [28]:
# CD, W= I
net2 = WEN(step_size = 1e-3, max_iter = 5000)
net2.fit_CD(X_train, y_train)
y_predict_2 = net2.predict(X_test)
RSS_2 = np.linalg.norm(y_test - y_predict_2)/len(y_test)

In [30]:
print(RSS_1, RSS_2)

0.946914681431 0.947256632439


In [31]:
net1._best_coeff

array([-12.60711218,   7.32337907,   4.10704478,   3.90667457,
        -1.74532829,  52.68382456,   3.44082533,  -3.67697622,
         5.30194624,  -7.31781608,  -7.81054973,  14.89886993, -15.75074495])

In [32]:
net2._best_coeff

array([-12.62253565,   7.28702861,   4.00325255,   3.92329132,
        -1.62562859,  52.73443939,   3.34951865,  -3.62740807,
         5.21167598,  -7.21860217,  -7.78709832,  14.861909  , -15.61795205])

## Adding Weight

In [35]:
weight3 = np.diag(1/(y_train - net1.predict(X_train))**2)

In [36]:
net3 = WEN(step_size = 1e-3, weight=weight3, max_iter=5000)
net3.fit(X_train, y_train)
y_predict_3 = net3.predict(X_test)
RSS_3 = np.linalg.norm(y_test - y_predict_3)/len(y_test)

In [37]:
RSS_3

1.3071567423839336

In [38]:
net3._best_coeff

array([ -7.77408554,   0.59823645,  -1.69042564,  20.53776962,
         9.14129615,  34.48136401,  10.52029918,   4.1120503 ,
         1.313977  ,  -7.38204178,   5.38186539,  16.14760847, -23.52555606])

In [39]:
weight4 = np.diag(1/(y_train - net2.predict(X_train))**2)

net4 = WEN(step_size = 1e-3, weight=weight4, max_iter=5000)
net4.fit(X_train, y_train)
y_predict_4 = net4.predict(X_test)
RSS_4 = np.linalg.norm(y_test - y_predict_4)/len(y_test)

In [40]:
RSS_4

1.2930165220793657

In [41]:
net4._best_coeff

array([ -7.44212451,   0.67029348,  -1.54704442,  20.10449748,
         8.60855211,  34.29468936,  10.53667657,   4.11518011,
         1.11864981,  -7.14266098,   5.61024793,  15.97472913, -23.38301629])

## Optimization

In [78]:
l1 = np.linspace(0.2,1,5)
l2 = np.linspace(0.2,1,5)
xv, yv = np.meshgrid(l1, l2)

In [83]:
best_coeffs = []
smallest_RSS = np.inf
y_predicts = []
best_pairs = None

for i in zip(xv.flatten(), yv.flatten()):
    net = WEN(l1 = i[0], l2 = i[1], step_size = 1e-3, max_iter = 5000)
    # fit the data
    net.fit(X_train, y_train)
    # store the best coeff found
    best_coeffs.append(net._best_coeff)
    # make prediciton
    y_predict = net.predict(X_test)
    # store the prediction
    y_predicts.append(y_predict)
    # current error
    current_RSS = np.linalg.norm(y_test - y_predict)/len(y_test)
    
    if (current_RSS < smallest_RSS):
        best_pairs = i
        smallest_RSS = current_RSS
    

In [84]:
smallest_RSS

0.53341888835388207

In [85]:
best_pairs

(1.0, 0.20000000000000001)