# Ad targeting using LASSO Regression
##By Cathy Robison


In [1]:
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LassoLarsCV
from sklearn.metrics import mean_squared_error

#Data can be found in github repo
fr = pd.read_csv('finalmaster-ratios.csv')

allvariablenames = list(fr.columns.values)
#Delete variables that are not vaild predictors 
del allvariablenames[0:8]

#Predictor variables for the model
predictors = fr[allvariablenames]
#Target variable for the model
target = fr['# Purchases']

#Split data into train and test sets, with 30% for test
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, target, test_size=.3, random_state=123)         

# Fit model using the training set
model = LassoLarsCV(cv=10).fit(pred_train, tar_train)
#Returns the coefficient of determination R^2
model.score(pred_test, tar_test)
model.alpha_
model.predict(pred_test)




array([ 2.96771770e+01,  4.51311572e+01,  1.78484292e+01,  2.23049285e+01,
        2.94469159e+02,  1.59419320e+02,  8.53164946e+01,  5.51913659e+01,
        5.49415258e+01,  5.22248596e+01,  3.42277009e+00,  7.06451319e+01,
        4.13494044e+01,  2.70528233e+01,  2.03673566e+01,  2.78720261e+01,
        4.41767012e+00, -9.73056149e-02,  3.39776155e+01,  1.53094743e+02,
        3.43749745e+01,  4.88973597e+01,  9.82775148e+01,  5.08856861e+01,
        3.74342370e+01,  9.69137711e+01,  3.66071504e+01,  3.81637800e+01,
       -2.55196463e+01,  2.99797769e+01, -8.94455451e+00,  3.73219091e+01,
        1.95168230e+01, -2.12081278e+00, -9.57640609e+00,  1.58633857e+01,
        9.32573158e+01,  1.19575175e+02, -4.57405018e+00,  1.13298143e+01,
        4.32111706e+01,  1.19528253e+01,  3.36419901e+01, -1.74604294e+01,
        5.50969306e+01,  8.82102631e+01,  5.30136213e+01,  3.82724559e+01,
        3.96480503e+01,  5.87004457e+01,  2.76737088e+01,  1.65452045e+02,
       -1.04845702e+01,  

In [2]:
#Lets see what coefficients were significant 
predictors_model= pd.DataFrame(allvariablenames) #This makes the predictor names into a dataframe
predictors_model.columns = ['label'] #Names column 'label'
predictors_model['coeff']=model.coef_ #Creates a new column in the data frame called 'coeff'

for index, row in predictors_model.iterrows():
    if row['coeff'] > 0:
        print(row.values)
#This for loop loops through the data frame and whatever predictor has a coefficient greater than 0
        #it prints the predictor label and its corresponding coefficient.
        

['B01001036' 2.784856986420159]
['B01001037' 0.9234930857404328]
['B01001038' 0.9491459380764333]
['B02001005' 0.3919782979991068]
['B13014026' 0.22147975335090184]
['B13014027' 0.05121418112617723]
['B19001017' 1.6058830181449382]


In [3]:
#Mean squared error of the training sets
train_error = mean_squared_error(tar_train, model.predict(pred_train))
print('training data MSE')
print(train_error) #22525.64
#Mean squared error of the test sets
test_error = mean_squared_error(tar_test, model.predict(pred_test))
print('Test data MSE')
print(test_error) #41573.80


training data MSE
22525.63625144556
Test data MSE
41573.80112905681


In [4]:
#R square for the training set
rsquared_train = model.score(pred_train, tar_train)
print ('training data R-square')
print(rsquared_train) #0.22

training data R-square
0.2227648778602469


In [5]:
#R square for the test set
rsquared_test = model.score(pred_test, tar_test)
print ('test data R-square')
print(rsquared_test) #0.17

test data R-square
0.1753817900469531


In [6]:
#y intercept 
print('y intercept')
print(model.intercept_)

y intercept
2.8174754145509553
