In [2]:
# Model prediction using Lasso

# Import all necessary packages
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
sample = pd.read_csv("sample_submission.csv")
test = pd.read_csv("test.csv")
train = pd.read_csv("train.csv")
target = train.pop("TARGET_5Yrs")
scaler = StandardScaler()
scaler.fit_transform(train)

array([[ 0.93005882, -1.73183431,  1.00610018, ...,  1.1072419 ,
        -0.05507101,  0.47321012],
       [-0.38091406, -1.7314013 ,  0.71400493, ..., -0.36478721,
         0.43214835,  0.1966711 ],
       [-0.28687803, -1.73096829,  1.29819543, ..., -0.6101254 ,
        -0.05507101, -0.90948499],
       ...,
       [-0.33188458,  1.73096829,  1.29819543, ...,  1.35258008,
        -0.05507101,  0.74974914],
       [-1.39921865,  1.7314013 , -1.38908087, ..., -0.85546358,
         0.06673383, -1.0477545 ],
       [-0.99491401,  1.73183431, -0.80489037, ...,  0.61656553,
        -0.66409522,  0.1966711 ]])

In [4]:
X_train, X_val, y_train, y_val = train_test_split(train, target, test_size=0.2, random_state=8)

In [5]:
# Instantiate Lasso Class into reg
reg = Lasso(max_iter=1000)

# fitting
reg.fit(X_train,y_train)

Lasso()

In [6]:
# predictions
y_train_preds = reg.predict(X_train)
y_val_preds = reg.predict(X_val)

In [7]:
#RMSE and MAE scores for this model on training set and validation set
print("Train RMSE - " + str(mse(y_train, y_train_preds, squared=False)))
print("Train MAE - " + str(mae(y_train, y_train_preds)))
print("Val RMSE - " + str(mse(y_val, y_val_preds, squared=False)))
print("Val MAE - " + str(mae(y_val, y_val_preds)))

Train RMSE - 0.36695727636332515
Train MAE - 0.2731470179649823
Val RMSE - 0.3613339428307061
Val MAE - 0.2690114432300281


In [8]:
# prepare submission
y_test_preds = reg.predict(test)

In [9]:
# check if there is out of binary values
print(list(y_test_preds[y_test_preds > 1]))
print(list(y_test_preds[y_test_preds < 0]))

[]
[]


In [10]:
submission = pd.DataFrame({'Id':test['Id'],'TARGET_5Yrs':y_test_preds})
submission.to_csv('submission_week_1_7_v1.csv',index=False)

In [11]:
from joblib import dump  

dump(reg,  'lasso_base.joblib')

['lasso_base.joblib']