In [1]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':float, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}

<h1>Feature Selection and LASSO Regression</h1>
In this lession, we will use LASSO to select features, building on a pre-implemented solver for LASSO.

Learning objectives:

Run LASSO with different L1 penalties.
Choose best L1 penalty using a validation set.
Choose best L1 penalty using a validation set, with additional constraint on the size of subset.
In addition, we also will implement your own LASSO solver using coordinate descent.

We will continue to use the House data from previous notebooks.

In [48]:
import pandas as pd

dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':float, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}

sales = pd.read_csv('kc_house_data.csv', dtype=dtype_dict)
sales.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3.0,1.0,1180.0,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340.0,5650.0
1,6414100192,20141209T000000,538000.0,3.0,2.25,2570.0,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690.0,7639.0
2,5631500400,20150225T000000,180000.0,2.0,1.0,770.0,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720.0,8062.0
3,2487200875,20141209T000000,604000.0,4.0,3.0,1960.0,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360.0,5000.0
4,1954400510,20150218T000000,510000.0,3.0,2.0,1680.0,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800.0,7503.0


In [49]:
from math import log, sqrt
sales['sqft_living_sqrt'] = sales['sqft_living'].apply(sqrt)
sales['sqft_lot_sqrt'] = sales['sqft_lot'].apply(sqrt)
sales['bedrooms_square'] = sales['bedrooms']*sales['bedrooms']
sales['floors_square'] = sales['floors']*sales['floors']

<p>Squaring bedrooms will increase the separation between not many bedrooms (e.g. 1) and lots of bedrooms (e.g. 4) since 1^2 = 1 but 4^2 = 16. Consequently this variable will mostly affect houses with many bedrooms.</p>
    
<p>On the other hand, taking square root of sqft_living will decrease the separation between big house and small house. The owner may not be exactly twice as happy for getting a house that is twice as big.</p>

Using the entire house dataset, learn regression weights using an L1 penalty of 5e2. Make sure to add "normalize=True" when creating the Lasso object. Refer to the following code snippet for the list of features

In [5]:
from sklearn import linear_model  # using scikit-learn

all_features = ['bedrooms', 'bedrooms_square',
            'bathrooms',
            'sqft_living', 'sqft_living_sqrt',
            'sqft_lot', 'sqft_lot_sqrt',
            'floors', 'floors_square',
            'waterfront', 'view', 'condition', 'grade',
            'sqft_above',
            'sqft_basement',
            'yr_built', 'yr_renovated']

model_all = linear_model.Lasso(alpha=5e2, normalize=True) # set parameters
model_all.fit(sales[all_features], sales['price']) # learn weights

Lasso(alpha=500.0, normalize=True)

In [13]:
for i in range(0,len(model_all.coef_)):
    print(all_features[i],":",model_all.coef_[i])

bedrooms : 0.0
bedrooms_square : 0.0
bathrooms : 0.0
sqft_living : 134.43931395541438
sqft_living_sqrt : 0.0
sqft_lot : 0.0
sqft_lot_sqrt : 0.0
floors : 0.0
floors_square : 0.0
waterfront : 0.0
view : 24750.004585609488
condition : 0.0
grade : 61749.10309070811
sqft_above : 0.0
sqft_basement : 0.0
yr_built : -0.0
yr_renovated : 0.0


In [6]:
testing = pd.read_csv('wk3_kc_house_test_data.csv', dtype=dtype_dict)
training = pd.read_csv('wk3_kc_house_train_data.csv', dtype=dtype_dict)
validation = pd.read_csv('wk3_kc_house_valid_data.csv', dtype=dtype_dict)

In [7]:
testing['sqft_living_sqrt'] = testing['sqft_living'].apply(sqrt)
testing['sqft_lot_sqrt'] = testing['sqft_lot'].apply(sqrt)
testing['bedrooms_square'] = testing['bedrooms']*testing['bedrooms']
testing['floors_square'] = testing['floors']*testing['floors']

training['sqft_living_sqrt'] = training['sqft_living'].apply(sqrt)
training['sqft_lot_sqrt'] = training['sqft_lot'].apply(sqrt)
training['bedrooms_square'] = training['bedrooms']*training['bedrooms']
training['floors_square'] = training['floors']*training['floors']

validation['sqft_living_sqrt'] = validation['sqft_living'].apply(sqrt)
validation['sqft_lot_sqrt'] = validation['sqft_lot'].apply(sqrt)
validation['bedrooms_square'] = validation['bedrooms']*validation['bedrooms']
validation['floors_square'] = validation['floors']*validation['floors']

In [46]:
import numpy as np
l1_penalty=np.logspace(1, 7, num=13)
r=[]
for i in l1_penalty:
    model=linear_model.Lasso(alpha=i, normalize=True)
    model.fit(training[all_features],training["price"])
    validation['predict']=model.predict(validation[all_features])
    validation['rss']=(validation['predict']-validation['price'])**2
    RSS=validation['rss'].sum()
    r.append(RSS/10000000000000)
    print(i,': ',RSS,np.count_nonzero(model.coef_) + np.count_nonzero(model.intercept_),"\n")
#model1 = linear_model.Lasso(alpha=l1_penalty, normalize=True)

10.0 :  398213327300134.94 15 

31.622776601683793 :  399041900253346.8 15 

100.0 :  429791604072559.6 11 

316.22776601683796 :  463739831045121.06 6 

1000.0 :  645898733633800.8 4 

3162.2776601683795 :  1222506859427163.0 1 

10000.0 :  1222506859427163.0 1 

31622.776601683792 :  1222506859427163.0 1 

100000.0 :  1222506859427163.0 1 

316227.7660168379 :  1222506859427163.0 1 

1000000.0 :  1222506859427163.0 1 

3162277.6601683795 :  1222506859427163.0 1 

10000000.0 :  1222506859427163.0 1 



In [51]:
from sklearn.metrics import mean_squared_error
for alpha in (l1_penalty):
    print('Alpha', alpha) 
    model_lasso2 = linear_model.Lasso(alpha, normalize=True)
    model2 = model_lasso2.fit(training[all_features], training['price'])
    print('The number of nonzero coefficients:',np.count_nonzero(model2.coef_) + np.count_nonzero(model2.intercept_))
    rss_val = mean_squared_error(validation['price'], model2.predict(validation[all_features]))
    print('RSS for validation data:', rss_val)
    rss_test = mean_squared_error(testing['price'], model2.predict(testing[all_features]))
    print('RSS for testing data:', rss_test)
    print('-------------------------------------')

Alpha 10.0
The number of nonzero coefficients: 15
RSS for validation data: 41329873098.093925
RSS for testing data: 44414705707.12621
-------------------------------------
Alpha 31.622776601683793
The number of nonzero coefficients: 15
RSS for validation data: 41415869253.0718
RSS for testing data: 45005110231.37095
-------------------------------------
Alpha 100.0
The number of nonzero coefficients: 11
RSS for validation data: 44607327874.68185
RSS for testing data: 48272769559.57964
-------------------------------------
Alpha 316.22776601683796
The number of nonzero coefficients: 6
RSS for validation data: 48130755687.09093
RSS for testing data: 51517241147.93143
-------------------------------------
Alpha 1000.0
The number of nonzero coefficients: 4
RSS for validation data: 67036713402.57403
RSS for testing data: 68423119736.932465
-------------------------------------
Alpha 3162.2776601683795
The number of nonzero coefficients: 1
RSS for validation data: 126881874356.73721
RSS for 

In [27]:
r

[39.821332730013495,
 39.90419002533468,
 42.97916040725596,
 46.37398310451211,
 64.58987336338008,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163]

In [28]:
model = linear_model.Lasso(alpha=10, normalize=True)

In [31]:
model.fit(training[all_features],training["price"])
testing['predict']=model.predict(testing[all_features])
testing['rss']=(testing['predict']-testing['price'])**2
RSS=testing['rss'].sum()
print('10',': ',RSS,"\n")

10 :  98467402552698.81 



In [32]:
np.count_nonzero(model.coef_) + np.count_nonzero(model.intercept_)

15

In [41]:
l1_penalty = np.logspace(1, 4, num=20)
r=[]
for i in l1_penalty:
    model=linear_model.Lasso(alpha=i, normalize=True)
    model.fit(training[all_features],training["price"])
    validation['predict']=model.predict(validation[all_features])
    validation['rss']=(validation['predict']-validation['price'])**2
    RSS=validation['rss'].sum()
    r.append(RSS/10000000000000)
    print(i,': ',RSS,(np.count_nonzero(model.coef_) + np.count_nonzero(model.intercept_)),"\n")
r

10.0 :  398213327300134.94 15 

14.38449888287663 :  396831833943813.56 15 

20.6913808111479 :  396210901853184.25 15 

29.76351441631318 :  398215534574785.9 15 

42.81332398719393 :  406877258520204.56 13 

61.58482110660264 :  424647490490609.4 12 

88.58667904100822 :  427906308934484.9 11 

127.42749857031335 :  435374677102680.6 10 

183.29807108324357 :  443107216261395.5 7 

263.6650898730358 :  454176669662635.25 6 

379.26901907322497 :  478132980831627.0 6 

545.5594781168514 :  531397181866766.4 6 

784.7599703514607 :  594043306274207.4 5 

1128.8378916846884 :  674059169985784.2 3 

1623.776739188721 :  802609410822920.6 3 

2335.7214690901214 :  1061255252873615.4 2 

3359.818286283781 :  1222506859427163.0 1 

4832.930238571752 :  1222506859427163.0 1 

6951.927961775606 :  1222506859427163.0 1 

10000.0 :  1222506859427163.0 1 



[39.821332730013495,
 39.68318339438136,
 39.62109018531842,
 39.821553457478586,
 40.687725852020456,
 42.464749049060934,
 42.79063089344849,
 43.53746771026806,
 44.31072162613955,
 45.417666966263525,
 47.8132980831627,
 53.13971818667664,
 59.40433062742074,
 67.40591699857842,
 80.26094108229206,
 106.12552528736154,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163,
 122.2506859427163]

In [47]:

model=linear_model.Lasso(alpha=183.29, normalize=True)
model.fit(training[all_features],training["price"])
model.coef_

array([-0.00000000e+00, -0.00000000e+00,  4.85135071e+03,  1.65209585e+02,
        0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  4.84787240e+05,  4.14999096e+04,  0.00000000e+00,
        1.13407732e+05,  0.00000000e+00,  0.00000000e+00, -2.41392565e+03,
        0.00000000e+00])

In [57]:
l1_penalty = np.linspace(148.84, 211.36, num=20)
r=[]
for i in l1_penalty:
    model=linear_model.Lasso(alpha=i, normalize=True)
    model.fit(training[all_features],training["price"])
    validation['predict']=model.predict(validation[all_features])
    validation['rss']=(validation['predict']-validation['price'])**2
    RSS=mean_squared_error(validation['price'], model.predict(validation[all_features]))
    r.append(RSS/10000000000000)
    print(i,': ',RSS,(np.count_nonzero(model.coef_) + np.count_nonzero(model.intercept_)),"\n")
r

148.84 :  45578184326.134964 8 

152.13052631578947 :  45624327735.75937 8 

155.42105263157896 :  45663649617.60571 7 

158.71157894736842 :  45698098730.77428 7 

162.0021052631579 :  45733491479.019295 7 

165.29263157894738 :  45770002474.46598 7 

168.58315789473684 :  45807615687.71663 7 

171.87368421052633 :  45846332695.62014 7 

175.1642105263158 :  45886154318.05983 7 

178.45473684210526 :  45927079345.947334 7 

181.74526315789475 :  45969112532.16422 7 

185.03578947368422 :  46012251473.821465 7 

188.3263157894737 :  46056495164.3218 7 

191.61684210526317 :  46102072551.73863 7 

194.90736842105264 :  46148509700.05384 7 

198.1978947368421 :  46196045941.434944 7 

201.4884210526316 :  46244667955.207565 7 

204.7789473684211 :  46294576563.46026 7 

208.06947368421055 :  46338674274.14837 6 

211.36 :  46378826918.406166 6 



[0.004557818432613496,
 0.004562432773575937,
 0.004566364961760571,
 0.004569809873077427,
 0.004573349147901929,
 0.004577000247446598,
 0.004580761568771663,
 0.004584633269562014,
 0.004588615431805983,
 0.004592707934594734,
 0.004596911253216422,
 0.004601225147382147,
 0.00460564951643218,
 0.004610207255173863,
 0.004614850970005384,
 0.004619604594143494,
 0.004624466795520757,
 0.004629457656346026,
 0.004633867427414837,
 0.004637882691840616]

In [58]:
alpha_for_min_rss=155.9
model_lasso5 = linear_model.Lasso(alpha= alpha_for_min_rss, normalize = True)
model5 = model_lasso5.fit(training[all_features], training['price'])
model5_coef = {}
for feat, coef in zip(all_features, model5.coef_):
    model5_coef[feat] = coef
model5_coef

{'bedrooms': -0.0,
 'bedrooms_square': -0.0,
 'bathrooms': 10654.900014638464,
 'sqft_living': 163.36637008940252,
 'sqft_living_sqrt': 0.0,
 'sqft_lot': -0.0,
 'sqft_lot_sqrt': -0.0,
 'floors': 0.0,
 'floors_square': 0.0,
 'waterfront': 506618.3006072249,
 'view': 41963.60323651809,
 'condition': 0.0,
 'grade': 116275.41199983549,
 'sqft_above': 0.0,
 'sqft_basement': 0.0,
 'yr_built': -2613.7578141017707,
 'yr_renovated': 0.0}

<h1>Implementing LASSO using coordinate descent</h1>

In [128]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':float, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}

In [147]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = int(1) # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe=data_sframe[features]
    # this will convert the features_sframe into a numpy matrix:
    features_matrix = np.array(features_sframe)
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray=data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = np.array(output_sarray)
    return(features_matrix, output_array)

def predict_output(feature_matrix, weights):
    x_arr =np.dot(feature_matrix,weights)
    predictions = x_arr.sum(axis = 1)
    return predictions

<h5>In the house dataset, features vary wildly in their relative magnitude: sqft_living is very large overall compared to bedrooms, for instance. As a result, weight for sqft_living would be much smaller than weight for bedrooms. This is problematic because "small" weights are dropped first as l1_penalty goes up.

To give equal considerations for all features, we need to normalize features as discussed in the lectures: we divide each feature by its 2-norm so that the transformed feature has norm 1.

Let's see how we can do this normalization easily with Numpy: let us first consider a small matrix</h5>

In [130]:
def normalize_features(features_matrix):
    norms=np.linalg.norm(features_matrix,axis=0)    
    normalized_features = features_matrix / norms
    return (normalized_features, norms)

In [131]:
feature, price = get_numpy_data(sales, ['sqft_living', 'bedrooms'], 'price')

In [132]:
normalized_features, norms = normalize_features(feature)

Review of Coordinate Descent
7. We seek to obtain a sparse set of weights by minimizing the LASSO cost function


SUM[ (prediction - output)^2 ] + lambda*( |w[1]| + ... + |w[k]|).

By convention, we do not include w[0] in the L1 penalty term. We never want to push the intercept to zero

normalized_features

<p>Least square </p>

In [133]:
ro = [0, 0, 0]
w = [1,4,1]

prediction = predict_output(normalized_features, w)
for i in range(normalized_features.shape[1]):
    #prediction = predict_output(normalized_features, w)
    #partial derivative of Cost w.r.t  w[i]=-2ro+2w[i]
    error = price - prediction + normalized_features[:,i]*w[i]
    ro[i] = np.dot(normalized_features[:,i],error.transpose())
    print(ro[i])
print(ro)


79400300.01452291
87939470.82325175
80966698.66623946
[79400300.01452291, 87939470.82325175, 80966698.66623946]


In [134]:
normalized_features.shape[1]

3

<h1>4.2 Single Coordinate Descent Step</h1>
Using the formula above, implement coordinate descent that minimizes the cost function over a single feature i. Note that the intercept (weight 0) is not regularized. The function should accept feature matrix, output, current weights, l1 penalty, and index of feature to optimize over. The function should return new weight for feature i.

In [135]:
def zerolistmaker(n):
    listofzeros = [0] * n
    return listofzeros
def lasso_coordinate_descent_step(i,feature_matrix, output, weights, l1_penalty):
    
    prediction = predict_output(feature_matrix,weights)
    error = output - prediction + feature_matrix[:,i]*weights[i]
    ro = np.dot(feature_matrix[:,i],error.transpose())

    if i == 0:
        weights = ro
    elif ro < -l1_penalty/2.:
        weights = ro + l1_penalty/2
    elif ro > l1_penalty/2.:
        weights = ro - l1_penalty/2
    else:
        weights = 0.
    return weights

In [136]:
# should print 0.425558846691
import math
print (lasso_coordinate_descent_step(1, np.array([[3./math.sqrt(13),1./math.sqrt(10)],[2./math.sqrt(13),3./math.sqrt(10)]]), np.array([1., 1.]), np.array([1., 4.]), 0.1))

0.4255588466910251


<h1>4.3 Cyclical coordinate descent</h1>
Now that we have a function that optimizes the cost function over a single coordinate, let us implement cyclical coordinate descent where we optimize coordinates 0, 1, ..., (d-1) in order and repeat.

In [137]:
def lasso_cyclical_coordinate_descent(normalized_features, output, initial_weights, l1_penalty, tolerance):
    
    weights = initial_weights
    ini_weights = initial_weights
    loop = 0
    
    weights_change = zerolistmaker(normalized_features.shape[1])
    
    converged = False

    while not converged:      
        loop+=1                             
        for i in range(len(weights)):
            
            weights_new = lasso_coordinate_descent_step(i,normalized_features, output, weights, l1_penalty)
            weights_change[i] =  np.abs(weights_new - weights[i])
            weights[i] = weights_new        
        max_change = max(weights_change)        
        if max_change < tolerance:
            converged = True
    return weights


feature, price = get_numpy_data(sales, ['sqft_living', 'bedrooms'], 'price')

initial_weights = [0., 0., 0.]
l1_penalty = 1e7
tolerance = 1.0

w = lasso_cyclical_coordinate_descent(normalized_features, price, initial_weights, l1_penalty, tolerance)
print(w)

[21624997.95951909, 63157247.20788956, 0.0]


In [138]:
l1_penalty = 2.0e7
tolerance = 1.0
w = lasso_cyclical_coordinate_descent(normalized_features, price, initial_weights, l1_penalty, tolerance)
print(w)

[49656944.881958194, 32514040.41863478, 0.0]


In [139]:
training = pd.read_csv('kc_house_train_data.csv', dtype = dtype_dict)
testing = pd.read_csv('kc_house_test_data.csv', dtype = dtype_dict)
### Let us consider the following set of features.
more_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 
                'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated']

training

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3.0,1.00,1180.0,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340.0,5650.0
1,6414100192,20141209T000000,538000.0,3.0,2.25,2570.0,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.7210,-122.319,1690.0,7639.0
2,5631500400,20150225T000000,180000.0,2.0,1.00,770.0,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720.0,8062.0
3,2487200875,20141209T000000,604000.0,4.0,3.00,1960.0,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360.0,5000.0
4,1954400510,20150218T000000,510000.0,3.0,2.00,1680.0,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800.0,7503.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17379,7936000429,20150326T000000,1007500.0,4.0,3.50,3510.0,7200,2.0,0,0,...,9,2600,910,2009,0,98136,47.5537,-122.398,2050.0,6200.0
17380,2997800021,20150219T000000,475000.0,3.0,2.50,1310.0,1294,2.0,0,0,...,8,1180,130,2008,0,98116,47.5773,-122.409,1330.0,1265.0
17381,0263000018,20140521T000000,360000.0,3.0,2.50,1530.0,1131,3.0,0,0,...,8,1530,0,2009,0,98103,47.6993,-122.346,1530.0,1509.0
17382,0291310100,20150116T000000,400000.0,3.0,2.50,1600.0,2388,2.0,0,0,...,8,1600,0,2004,0,98027,47.5345,-122.069,1410.0,1287.0


In [140]:
from sklearn import preprocessing


In [141]:
feature1 ,price1 = get_numpy_data(training, more_features, 'price')

[1.000e+00 3.000e+00 2.250e+00 2.570e+03 7.242e+03 2.000e+00 0.000e+00
 0.000e+00 3.000e+00 7.000e+00 2.170e+03 4.000e+02 1.951e+03 1.991e+03]


In [145]:
normalized_features1,norms1 = normalize_features(feature1)
#feature1=np.array(feature1)
#norm1=np.linalg.norm(feature1,axis=0)
#normalized_features2=preprocessing.normalize(feature1,norm='l2')

In [150]:
### l1_penalty=1e7

initial_weights1 = [0.]*(len(more_features))
l1_penalty = 1e7
tolerance = 1.0

weights1e7 = lasso_cyclical_coordinate_descent(normalized_features1, price1, initial_weights1, l1_penalty, tolerance)
weights1e7_dict = {}

for feat, coef in zip(more_features, weights1e7):
    weights1e7_dict[feat] = coef
weights1e7_dict


ValueError: shapes (17384,14) and (13,) not aligned: 14 (dim 1) != 13 (dim 0)