In [28]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import numpy as np

In [29]:
data = np.genfromtxt("Newspaper_Num_Cat.csv", delimiter=',', names=True, 
                    dtype=None, encoding='utf-8-sig')

print(data.dtype)

[('Advert_Spend', '<i4'), ('Price', '<i4'), ('FP_Story', '<U8'), ('Prize_Value', '<i4'), ('Wet', '<i4'), ('Sales', '<i4')]


In [39]:
from sklearn.preprocessing import OneHotEncoder
from numpy.lib import recfunctions
import numpy as np

enc = OneHotEncoder()
fp = data['FP_Story'].reshape(-1,1)
enc.fit(fp)
codedfp = enc.transform(fp).toarray()

ndata = data[['Advert_Spend','Price', 'Prize_Value', 'Wet', 'Sales']]
ndata = recfunctions.structured_to_unstructured(ndata)
ndata = np.insert(ndata, [2], codedfp, axis=1)
print(ndata[0])

[ 1757    60     0     1     0     0     0    30     1 50611]


In [55]:
cols = ndata.shape[1]
X = ndata[:,0:cols-1]
y = ndata[:,cols-1]

print(ndata[0])

[ 1757    60     0     1     0     0     0    30     1 50611]


In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

In [65]:
LReg_clf = LinearRegression().fit(X_train, y_train)
LReg_clf.score(X_train, y_train)

0.9888860206241448

In [69]:
y_preds = LReg_clf.predict(X_test)
test_MAE = metrics.mean_absolute_error(y_preds, y_test)
print("Mean Absolute Error on Test: ", test_MAE)

Mean Absolute Error on Test:  777.0490156976628


In [70]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [73]:
ridge_reg = Ridge(alpha = 1.0)
ridge_reg.fit(X_train, y_train)

Ridge()

In [77]:
Ridge_cv_score = cross_val_score(ridge_reg, X_train, y_train, cv=5)

print(Ridge_cv_score)
print(Ridge_cv_score.mean())

[0.99037832 0.98895439 0.98748693 0.98797324 0.98335282]
0.987629141037209


In [78]:
lasso_reg = Lasso(alpha=1.0)
lasso_reg.fit(X_train, y_train)

Lasso()

In [82]:
Lasso_cv_score = cross_val_score(lasso_reg, X_train, y_train, cv=5)

print(Lasso_cv_score)
print(Lasso_cv_score.mean())

[0.99051792 0.98889431 0.9875584  0.98815003 0.98343084]
0.9877103023547361


In [85]:
cross_val_score(lasso_reg, X_train, y_train, cv=5)

array([0.99051792, 0.98889431, 0.9875584 , 0.98815003, 0.98343084])

In [89]:
for alpha in [1, 10, 100, 1000]:
    lasso_reg = Lasso(alpha=alpha)
    lasso_reg.fit(X_train, y_train)
    print("Alpha=", alpha , "CV Scores:", cross_val_score(lasso_reg, X_train, y_train, cv=5))

Alpha= 1 CV Scores: [0.99051792 0.98889431 0.9875584  0.98815003 0.98343084]
Alpha= 10 CV Scores: [0.99055179 0.98895722 0.9874845  0.98797481 0.98346183]
Alpha= 100 CV Scores: [0.98788002 0.98632641 0.9844434  0.98375961 0.97935309]
Alpha= 1000 CV Scores: [0.82842891 0.78879969 0.854549   0.82397041 0.72053177]


In [97]:
y_pred_lasso=lasso_reg.predict(X_test)

print(y_pred_lasso)

[54333.08413473 73455.01957736 59091.13905077 56652.77561294
 72522.62582093 53651.81162064 54263.73726887 72627.16975012
 62346.80538033 58467.68428851 58135.1424273  67236.33924501
 51310.49347688 66569.40021515 56119.20248341 58092.58273265
 57445.26105111 63667.2087759  50981.56582634 61332.00882651
 61288.37054912 62786.61245268 62223.2351735  32640.72238936
 61656.6419459  58546.93589747 56590.5409644  65106.30393089
 63374.10706515 73062.00197083 49423.77891194 53890.49102045
 63585.11968099 61231.75942847 51825.7522935  61096.08235288
 49928.10584677 67729.14322735 47893.32527925 59035.94001512
 63396.06723297 56850.75062554 66369.76270355 44489.83719958
 67429.15991348 51623.46294996 50564.39924229 51583.58673148
 70092.75504118 58456.25774025 60582.30038125 51866.15482269
 53976.82320248 48696.52530097 70878.57148448 70071.17333583
 63597.13014553 65293.99573794 72191.16254245 51674.84880497
 63821.57862686 67370.84133364 60082.64644004 46918.25093413
 60492.70080676 58356.40