In [1]:
import pandas as pd
import numpy as np

In [2]:
train_df = pd.read_csv(r'C:\Users\shash\Data\loan_data_train.csv')
test_df = pd.read_csv(r'C:\Users\shash\Data\loan_data_test.csv')

In [3]:
train_df.head()

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length
0,79542.0,25000,25000.0,18.49%,60 months,debt_consolidation,27.56%,VA,MORTGAGE,8606.56,720-724,11,15210,3.0,5 years
1,75473.0,19750,19750.0,17.27%,60 months,debt_consolidation,13.39%,NY,MORTGAGE,6737.5,710-714,14,19070,3.0,4 years
2,67265.0,2100,2100.0,14.33%,36 months,major_purchase,3.50%,LA,OWN,1000.0,690-694,13,893,1.0,< 1 year
3,80167.0,28000,28000.0,16.29%,36 months,credit_card,19.62%,NV,MORTGAGE,7083.33,710-714,12,38194,1.0,10+ years
4,17240.0,24250,17431.82,12.23%,60 months,credit_card,23.79%,OH,MORTGAGE,5833.33,730-734,6,31061,2.0,10+ years


In [4]:
test_df.head()

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length
0,20093,5000,5000,60 months,moving,12.59%,NY,RENT,4416.67,690-694,13,7686,0,< 1 year
1,62445,18000,18000,60 months,debt_consolidation,4.93%,CA,RENT,5258.5,710-714,6,11596,0,10+ years
2,65248,7200,7200,60 months,debt_consolidation,25.16%,LA,MORTGAGE,3750.0,750-754,13,7283,0,6 years
3,81822,7200,7200,36 months,debt_consolidation,17.27%,NY,MORTGAGE,3416.67,790-794,14,4838,0,10+ years
4,57923,22000,22000,60 months,debt_consolidation,18.28%,MI,MORTGAGE,6083.33,720-724,9,20181,0,8 years


In [5]:
# Combining test and training data for data preparation
test_df['Interest.Rate']=np.nan
train_df['data']='train'
test_df['data']='test'
test_df = test_df[train_df.columns]
final_df = pd.concat([train_df, test_df],axis=0)

In [6]:
final_df.head()

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,data
0,79542.0,25000,25000.0,18.49%,60 months,debt_consolidation,27.56%,VA,MORTGAGE,8606.56,720-724,11,15210,3.0,5 years,train
1,75473.0,19750,19750.0,17.27%,60 months,debt_consolidation,13.39%,NY,MORTGAGE,6737.5,710-714,14,19070,3.0,4 years,train
2,67265.0,2100,2100.0,14.33%,36 months,major_purchase,3.50%,LA,OWN,1000.0,690-694,13,893,1.0,< 1 year,train
3,80167.0,28000,28000.0,16.29%,36 months,credit_card,19.62%,NV,MORTGAGE,7083.33,710-714,12,38194,1.0,10+ years,train
4,17240.0,24250,17431.82,12.23%,60 months,credit_card,23.79%,OH,MORTGAGE,5833.33,730-734,6,31061,2.0,10+ years,train


In [7]:
final_df.dtypes

ID                                float64
Amount.Requested                   object
Amount.Funded.By.Investors         object
Interest.Rate                      object
Loan.Length                        object
Loan.Purpose                       object
Debt.To.Income.Ratio               object
State                              object
Home.Ownership                     object
Monthly.Income                    float64
FICO.Range                         object
Open.CREDIT.Lines                  object
Revolving.CREDIT.Balance           object
Inquiries.in.the.Last.6.Months    float64
Employment.Length                  object
data                               object
dtype: object

 Dropping cells ID and Amount.Funded.By.Investors as they are not valuable for the model.
 Cleaning Interest rate and debt to income by removing % and keeping the data type as numeric.
 Changing data type to numeric for : Amount.Requested , Open.CREDIT.Lines, Revolving.CREDIT.Balance 
 FICO.Range has interval value, will be keeping the average of intervals.
 Creating dummies for categorical variables Loan length, loan purpose, state and home ownership.

In [8]:
final_df.drop(['ID', 'Amount.Funded.By.Investors'], axis = 1, inplace = True)

In [9]:
for col in ['Interest.Rate','Debt.To.Income.Ratio']:
    final_df[col]=final_df[col].str.replace("%","")

In [10]:
for col in ['Amount.Requested', 'Interest.Rate','Debt.To.Income.Ratio',
            'Open.CREDIT.Lines','Revolving.CREDIT.Balance']:
    final_df[col]=pd.to_numeric(final_df[col],errors='coerce')

In [11]:
k=final_df['FICO.Range'].str.split("-",expand=True).astype(float)

final_df['FICO.Score']=0.5*(k[0]+k[1])

del final_df['FICO.Range']

final_df.head()

Unnamed: 0,Amount.Requested,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,data,FICO.Score
0,25000.0,18.49,60 months,debt_consolidation,27.56,VA,MORTGAGE,8606.56,11.0,15210.0,3.0,5 years,train,722.0
1,19750.0,17.27,60 months,debt_consolidation,13.39,NY,MORTGAGE,6737.5,14.0,19070.0,3.0,4 years,train,712.0
2,2100.0,14.33,36 months,major_purchase,3.5,LA,OWN,1000.0,13.0,893.0,1.0,< 1 year,train,692.0
3,28000.0,16.29,36 months,credit_card,19.62,NV,MORTGAGE,7083.33,12.0,38194.0,1.0,10+ years,train,712.0
4,24250.0,12.23,60 months,credit_card,23.79,OH,MORTGAGE,5833.33,6.0,31061.0,2.0,10+ years,train,732.0


In [12]:
final_df['Employment.Length'].value_counts()

10+ years    653
< 1 year     249
2 years      243
3 years      235
5 years      202
4 years      191
1 year       177
6 years      163
7 years      127
8 years      108
9 years       72
.              2
Name: Employment.Length, dtype: int64

In [13]:
final_df['Employment.Length']=final_df['Employment.Length'].str.replace('years',"")

final_df['Employment.Length']=final_df['Employment.Length'].str.replace('year',"")

final_df['Employment.Length']=np.where(final_df['Employment.Length'].str[:2]=="10",10,final_df['Employment.Length'])

final_df['Employment.Length']=np.where(final_df['Employment.Length'].str[0]=="<",0,final_df['Employment.Length'])

final_df['Employment.Length']=pd.to_numeric(final_df['Employment.Length'],errors='coerce')

In [14]:
cat_col=final_df.select_dtypes(['object']).columns
cat_col

Index(['Loan.Length', 'Loan.Purpose', 'State', 'Home.Ownership', 'data'], dtype='object')

In [15]:
cat_col=cat_col[:-1]
cat_col

Index(['Loan.Length', 'Loan.Purpose', 'State', 'Home.Ownership'], dtype='object')

In [16]:
for col in cat_col:
    freqs=final_df[col].value_counts()
    k=freqs.index[freqs>20][:-1]
    for cat in k:
        name=col+'_'+cat
        final_df[name]=(final_df[col]==cat).astype(int)
    del final_df[col]
    print(col)

Loan.Length
Loan.Purpose
State
Home.Ownership


In [17]:
final_df.shape

(2500, 51)

In [18]:
final_df.isnull().sum()

Amount.Requested                     5
Interest.Rate                      300
Debt.To.Income.Ratio                 1
Monthly.Income                       3
Open.CREDIT.Lines                    9
Revolving.CREDIT.Balance             5
Inquiries.in.the.Last.6.Months       3
Employment.Length                   80
data                                 0
FICO.Score                           0
Loan.Length_36 months                0
Loan.Purpose_debt_consolidation      0
Loan.Purpose_credit_card             0
Loan.Purpose_other                   0
Loan.Purpose_home_improvement        0
Loan.Purpose_major_purchase          0
Loan.Purpose_small_business          0
Loan.Purpose_car                     0
Loan.Purpose_wedding                 0
Loan.Purpose_medical                 0
Loan.Purpose_moving                  0
State_CA                             0
State_NY                             0
State_TX                             0
State_FL                             0
State_IL                 

In [19]:
for col in final_df.columns:
    if (col not in ['Interest.Rate','data'])& (final_df[col].isnull().sum()>0):
        final_df.loc[final_df[col].isnull(),col]=final_df.loc[final_df['data']=='train',col].mean()
        
final_df.isnull().sum()

Amount.Requested                     0
Interest.Rate                      300
Debt.To.Income.Ratio                 0
Monthly.Income                       0
Open.CREDIT.Lines                    0
Revolving.CREDIT.Balance             0
Inquiries.in.the.Last.6.Months       0
Employment.Length                    0
data                                 0
FICO.Score                           0
Loan.Length_36 months                0
Loan.Purpose_debt_consolidation      0
Loan.Purpose_credit_card             0
Loan.Purpose_other                   0
Loan.Purpose_home_improvement        0
Loan.Purpose_major_purchase          0
Loan.Purpose_small_business          0
Loan.Purpose_car                     0
Loan.Purpose_wedding                 0
Loan.Purpose_medical                 0
Loan.Purpose_moving                  0
State_CA                             0
State_NY                             0
State_TX                             0
State_FL                             0
State_IL                 

In [20]:
#Splitting back the train and test data
train_df=final_df[final_df['data']=='train']
del train_df['data']
test_df=final_df[final_df['data']=='test']
test_df.drop(['Interest.Rate','data'],axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [21]:
del final_df

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
train1,train2=train_test_split(train_df,test_size=0.2,random_state=2)

In [24]:
x_train =train1.drop('Interest.Rate',axis=1)
y_train =train1['Interest.Rate']

# Linear Regression

In [25]:
from sklearn.metrics import r2_score

In [26]:
from sklearn.linear_model import LinearRegression

In [27]:
lr=LinearRegression()

In [28]:
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [29]:
x_train.shape

(1760, 49)

In [30]:
lr.intercept_

75.9212141359478

In [31]:
list(zip(x_train.columns,lr.coef_))

[('Amount.Requested', 0.00015602405153877186),
 ('Debt.To.Income.Ratio', -0.003938504173777158),
 ('Monthly.Income', -2.6568573569170736e-05),
 ('Open.CREDIT.Lines', -0.039922608340240945),
 ('Revolving.CREDIT.Balance', -3.923647858838919e-06),
 ('Inquiries.in.the.Last.6.Months', 0.3361172111313311),
 ('Employment.Length', 0.0349936718945103),
 ('FICO.Score', -0.08667701121950822),
 ('Loan.Length_36 months', -3.143747246950592),
 ('Loan.Purpose_debt_consolidation', -0.46739356903559237),
 ('Loan.Purpose_credit_card', -0.6069873604061792),
 ('Loan.Purpose_other', 0.44417142270192733),
 ('Loan.Purpose_home_improvement', -0.3611899849181419),
 ('Loan.Purpose_major_purchase', -0.09589524932665638),
 ('Loan.Purpose_small_business', 0.06800548772823811),
 ('Loan.Purpose_car', 0.025259628036013893),
 ('Loan.Purpose_wedding', -0.779154265005644),
 ('Loan.Purpose_medical', -0.4281152109954953),
 ('Loan.Purpose_moving', 1.2845276544595716),
 ('State_CA', -0.21159715256759293),
 ('State_NY', -0.1

In [32]:
x_train2=train2.drop('Interest.Rate',axis=1)

In [33]:
predicted_value = lr.predict(x_train2)

In [34]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

In [35]:
mean_absolute_error(train2['Interest.Rate'],predicted_value)

1.6278267948887137

In [36]:
#carrying out the actual training and validation on test and train data
x_train=train_df.drop('Interest.Rate',axis=1)
y_train=train_df['Interest.Rate']

In [37]:
lr.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [38]:
testDataPred_LinearReg = lr.predict(test_df)

In [39]:
r2_score(train2['Interest.Rate'],predicted_value)

0.7587856682638678

In [40]:
#writing the prediction to a file
pd.DataFrame(testDataPred_LinearReg).to_csv("Output_LinearReg.csv",index=False)

# Ridge Regression

In [41]:
from sklearn.linear_model import Ridge,Lasso
from sklearn.model_selection import GridSearchCV

In [42]:
lambdas=np.linspace(1,100,100)

In [43]:
ridge_param={'alpha':lambdas}

In [44]:
ridge_model=Ridge(fit_intercept=True)

In [45]:
grid_search=GridSearchCV(ridge_model,param_grid=ridge_param,cv=10,scoring='neg_mean_absolute_error')

In [46]:
grid_search.fit(x_train,y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='warn', n_jobs=None,
             param_grid={'alpha': array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,
        23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,...
        34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,
        45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,
        56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,
        67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,
        78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,
        89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99.,
       100.]

In [47]:
x_train2=train2.drop('Interest.Rate',axis=1)
predicted_value_RidgeReg = grid_search.predict(x_train2)
r2_score(train2['Interest.Rate'],predicted_value_RidgeReg)

0.7688453116561021

In [48]:
grid_search.best_estimator_

Ridge(alpha=34.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [49]:
grid_search.cv_results_

{'mean_fit_time': array([0.00956259, 0.00848281, 0.00895135, 0.00853801, 0.00847137,
        0.00941191, 0.00810595, 0.00898678, 0.00985782, 0.01015432,
        0.00994947, 0.00982044, 0.00710421, 0.0065892 , 0.00845342,
        0.00841804, 0.00887673, 0.00767105, 0.00706587, 0.00680032,
        0.00731699, 0.00776362, 0.00775449, 0.00778294, 0.00729871,
        0.00719624, 0.00573542, 0.00768809, 0.00736744, 0.00875933,
        0.00759923, 0.00715885, 0.00775683, 0.00851257, 0.00813015,
        0.0072027 , 0.00717127, 0.00843816, 0.00810354, 0.00753825,
        0.0111685 , 0.0099488 , 0.01135056, 0.01125121, 0.00953126,
        0.01020701, 0.01456277, 0.01220486, 0.01062956, 0.01085615,
        0.01055849, 0.01129856, 0.01205754, 0.01096065, 0.01032207,
        0.0113143 , 0.01177542, 0.00945294, 0.00773134, 0.00902932,
        0.00883541, 0.00773273, 0.00860231, 0.00774972, 0.00734921,
        0.00757101, 0.00756807, 0.00763881, 0.00629251, 0.00840948,
        0.00756485, 0.0085063 ,

Using the report function given below you can see the cv performance of top few models as well, that with the tentative performance

In [50]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [51]:
report(grid_search.cv_results_,100)

Model with rank: 1
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 34.0}

Model with rank: 2
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 35.0}

Model with rank: 3
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 33.0}

Model with rank: 4
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 36.0}

Model with rank: 5
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 32.0}

Model with rank: 6
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 37.0}

Model with rank: 7
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 38.0}

Model with rank: 8
Mean validation score: -1.611 (std: 0.116)
Parameters: {'alpha': 31.0}

Model with rank: 9
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 39.0}

Model with rank: 10
Mean validation score: -1.611 (std: 0.116)
Parameters: {'alpha': 30.0}

Model with rank: 11
Mean validation score: -1.611 (std: 0.117)
Parameters: {'alpha': 40.0

In [52]:
testDataPred_RidgeReg=grid_search.predict(test_df)

In [53]:
pd.DataFrame(testDataPred_RidgeReg).to_csv("Output_RidgeReg.csv",index=False)

# Lasso Regression

In [54]:
lambdas=np.linspace(1,10,100)
lasso_model=Lasso(fit_intercept=True)
lasso_param={'alpha':lambdas}

In [55]:
grid_search=GridSearchCV(lasso_model,param_grid=lasso_param,cv=10,scoring='neg_mean_absolute_error')

In [56]:
grid_search.fit(x_train,y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=None,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': array([ 1.        ,  1.09090909,  1.18181818,  1.27272727,  1.36363636,
        1.45454545,  1.5454...
        7.81818182,  7.90909091,  8.        ,  8.09090909,  8.18181818,
        8.27272727,  8.36363636,  8.45454545,  8.54545455,  8.63636364,
        8.72727273,  8.81818182,  8.90909091,  9.        ,  9.09090909,
        9.18181818,  9.27272727,  9.36363636,  9.45454545,  9.54545455,
        9.63636364,  9.72727273,  9.81818182,  9.90909091, 10.        ])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_absolu

In [57]:
grid_search.best_estimator_

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [58]:
lambdas=np.linspace(.001,2,100)
lasso_param={'alpha':lambdas}

In [59]:
grid_search=GridSearchCV(lasso_model,param_grid=lasso_param,cv=10,scoring='neg_mean_absolute_error')
grid_search.fit(x_train,y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=None,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': array([1.00000000e-03, 2.11919192e-02, 4.13838384e-02, 6.15757576e-02,
       8.17...
       1.61635354e+00, 1.63654545e+00, 1.65673737e+00, 1.67692929e+00,
       1.69712121e+00, 1.71731313e+00, 1.73750505e+00, 1.75769697e+00,
       1.77788889e+00, 1.79808081e+00, 1.81827273e+00, 1.83846465e+00,
       1.85865657e+00, 1.87884848e+00, 1.89904040e+00, 1.91923232e+00,
       1.93942424e+00, 1.95961616e+00, 1.97980808e+00, 2.00000000e+00])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_absolute_error', verbose=0)

In [60]:
grid_search.best_estimator_

Lasso(alpha=0.021191919191919192, copy_X=True, fit_intercept=True,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [61]:
report(grid_search.cv_results_,5)

Model with rank: 1
Mean validation score: -1.603 (std: 0.120)
Parameters: {'alpha': 0.021191919191919192}

Model with rank: 2
Mean validation score: -1.612 (std: 0.126)
Parameters: {'alpha': 0.041383838383838384}

Model with rank: 3
Mean validation score: -1.617 (std: 0.115)
Parameters: {'alpha': 0.001}

Model with rank: 4
Mean validation score: -1.619 (std: 0.127)
Parameters: {'alpha': 0.061575757575757575}

Model with rank: 5
Mean validation score: -1.624 (std: 0.128)
Parameters: {'alpha': 0.08176767676767677}



In [62]:
lasso_model=grid_search.best_estimator_

In [63]:
lasso_model.fit(x_train,y_train)

Lasso(alpha=0.021191919191919192, copy_X=True, fit_intercept=True,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [64]:
x_train2=train2.drop('Interest.Rate',axis=1)
predicted_value_LassoReg = lasso_model.predict(x_train2)
r2_score(train2['Interest.Rate'],predicted_value_LassoReg)

0.7683529322767257

In [65]:
list(zip(x_train.columns,lasso_model.coef_))

[('Amount.Requested', 0.00016025576388069374),
 ('Debt.To.Income.Ratio', -0.0009940455335086431),
 ('Monthly.Income', -2.7161150694601474e-05),
 ('Open.CREDIT.Lines', -0.03776066942268258),
 ('Revolving.CREDIT.Balance', -3.2291582902347866e-06),
 ('Inquiries.in.the.Last.6.Months', 0.32734647665538585),
 ('Employment.Length', 0.011883001638602755),
 ('FICO.Score', -0.08647817945138424),
 ('Loan.Length_36 months', -3.0098932744846625),
 ('Loan.Purpose_debt_consolidation', -0.1780182230425123),
 ('Loan.Purpose_credit_card', -0.22053269213383622),
 ('Loan.Purpose_other', 0.3196217853383159),
 ('Loan.Purpose_home_improvement', -0.0),
 ('Loan.Purpose_major_purchase', 0.0),
 ('Loan.Purpose_small_business', 0.0),
 ('Loan.Purpose_car', 0.0),
 ('Loan.Purpose_wedding', -0.0),
 ('Loan.Purpose_medical', -0.0),
 ('Loan.Purpose_moving', 0.0),
 ('State_CA', -0.0),
 ('State_NY', 0.0),
 ('State_TX', 0.2592516325019152),
 ('State_FL', 0.0),
 ('State_IL', -0.0),
 ('State_GA', -0.0),
 ('State_PA', -0.0),
 

In [66]:
testDataPred_LassoReg=grid_search.predict(test_df)

In [67]:
pd.DataFrame(testDataPred_LassoReg).to_csv("Output_LassoReg.csv",index=False)