In [1]:
##Python libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [2]:
##Import Training Dataset
loans_train_df = pd.read_csv('./outputs/cleaned_loans_train.csv')
loans_train_ada_df = pd.read_csv('./outputs/cleaned_loans_train.csv')
loans_train_df

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,PERSON_HOME_OWNERSHIP_MORTGAGE,PERSON_HOME_OWNERSHIP_OTHER,...,LOAN_GRADE_C,LOAN_GRADE_D,LOAN_GRADE_E,LOAN_GRADE_F,LOAN_GRADE_G,CB_PERSON_CRED_HIST_LENGTH_11_17,CB_PERSON_CRED_HIST_LENGTH_18_above,CB_PERSON_CRED_HIST_LENGTH_5_10,CB_PERSON_CRED_HIST_LENGTH_5_below,loan_status
0,1.569797,-1.081318,0.000000,-0.578305,0.516442,0.117380,0,0.411879,0,0,...,0,0,0,0,0,1,0,0,0,0
1,-0.921741,-0.052550,0.520621,-0.937769,0.619568,-0.973222,0,0.491800,0,0,...,1,0,0,0,0,0,0,0,1,0
2,0.240977,-1.508084,0.587859,-0.578305,0.340882,0.553620,0,0.377667,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0.407079,0.435878,0.724529,0.500086,0.493327,0.117380,0,0.528549,0,0,...,0,0,0,0,0,0,0,1,0,0
4,-0.921741,0.098465,0.293930,-0.578305,0.167927,-0.646041,0,0.503242,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101340,-0.622574,-0.580418,0.346125,-0.506511,0.742300,-0.188108,0,0.451231,0,0,...,0,1,0,0,0,0,0,0,1,1
101341,-0.718924,-0.528345,0.563312,-0.386847,0.733813,-0.076635,0,0.455258,0,0,...,0,1,0,0,0,0,0,0,1,1
101342,-0.888484,-0.791589,0.455777,-0.377021,0.792969,0.160931,0,0.434726,0,0,...,0,1,0,0,0,0,0,0,1,1
101343,-0.621401,-0.580418,0.344272,-0.507146,0.742285,-0.188879,0,0.451231,0,0,...,0,1,0,0,0,0,0,0,1,1


## Hyperparameter Tuning

In [3]:
df_hyper_tuning = pd.DataFrame(columns=['loss', 'alpha', 'eta0', 'tol', 'learning_rate', 'oversampling_method', 'roc_auc'])
df_hyper_tuning

Unnamed: 0,loss,alpha,eta0,tol,learning_rate,oversampling_method,roc_auc


In [4]:
from sklearn.model_selection import cross_val_score
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
import numpy as np
import sys
# Define the search space
search_space = [
    Categorical(['hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron'], name='loss'),
    Real(0.0001, 1000, name='alpha'),
    Real(0.0, 1000.0, name='eta0'),
    Real(0.0001, 0.1, name='tol'),
    Categorical(['constant', 'optimal', 'invscaling', 'adaptive'], name='learning_rate'),
    Categorical(['none', 'ada'], name='oversampling_method')
]

# Define your objective function (e.g., maximizing accuracy)
@use_named_args(search_space)
def objective_function(loss, alpha, eta0, tol, learning_rate, oversampling_method):
    print("================")
    print("Configuration:")
    print("Loss:", loss)
    print("Tolerance:", tol)
    print("Alpha:", alpha)
    print("Eta0:", eta0)
    print("Learning Rate:", learning_rate)
    print("Oversampling Method:", oversampling_method)
    print("----------------")
    try:
        if oversampling_method == 'none':
            X = loans_train_df.loc[:, loans_train_df.columns != "loan_status"]
            y = loans_train_df["loan_status"]
        elif oversampling_method == 'ada':
            X = loans_train_ada_df.loc[:, loans_train_ada_df.columns != "loan_status"]
            y = loans_train_ada_df["loan_status"]
            
        model = SGDClassifier(class_weight='balanced', loss=loss, alpha=alpha, eta0=eta0, max_iter=200, tol=tol, learning_rate=learning_rate)
        roc_auc = cross_val_score(model, X, y, cv=3, scoring='roc_auc').mean()

        print("Results:", -roc_auc)
        print("================")
        df_hyper_tuning.loc[len(df_hyper_tuning.index)] = [loss, alpha, eta0, tol, learning_rate, oversampling_method, roc_auc] 
        return -roc_auc
    except:
        print("Invalid Config")
        return 100000
        

# Perform Bayesian Optimization
res = gp_minimize(objective_function, search_space, n_calls=500)

# Print best parameters
print("Best parameters:", res.x)


Configuration:
Loss: squared_hinge
Tolerance: 0.08364950486033416
Alpha: 648.6285423473726
Eta0: 925.5750661239806
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7422939402914123
Configuration:
Loss: perceptron
Tolerance: 0.07394704287510731
Alpha: 587.7001303535353
Eta0: 167.91470420395706
Learning Rate: constant
Oversampling Method: ada
----------------
Results: -0.47858382884937
Configuration:
Loss: modified_huber
Tolerance: 0.02028770547596246
Alpha: 435.7963131606108
Eta0: 405.8598667328914
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.742482746242144
Configuration:
Loss: modified_huber
Tolerance: 0.0653803721124305
Alpha: 142.78021024941773
Eta0: 438.1989593514415
Learning Rate: invscaling
Oversampling Method: none
----------------
Results: -0.5157701163398083
Configuration:
Loss: squared_hinge
Tolerance: 0.06910029489160224
Alpha: 72.82615108911891
Eta0: 848.2547817191397
Learning Rate: adaptive
Oversampling Method:



Results: -0.7473953566026634
Configuration:
Loss: modified_huber
Tolerance: 0.1
Alpha: 0.0001
Eta0: 950.8666565204279
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.8368767337637729
Configuration:
Loss: modified_huber
Tolerance: 0.1
Alpha: 0.0001
Eta0: 782.4640024980606
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.8595042954703124
Configuration:
Loss: squared_hinge
Tolerance: 0.0001
Alpha: 0.0001
Eta0: 255.7934730574607
Learning Rate: adaptive
Oversampling Method: ada
----------------




Results: -0.7957588605073259
Configuration:
Loss: modified_huber
Tolerance: 0.1
Alpha: 0.0001
Eta0: 356.6193255731104
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.8349795538714458
Configuration:
Loss: perceptron
Tolerance: 0.1
Alpha: 0.0001
Eta0: 36.97353858191936
Learning Rate: constant
Oversampling Method: none
----------------
Results: -0.7535623064072978
Configuration:
Loss: modified_huber
Tolerance: 0.099999901565862
Alpha: 1000.0
Eta0: 498.44798359425045
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7403323241102996
Configuration:
Loss: squared_hinge
Tolerance: 0.1
Alpha: 1000.0
Eta0: 0.0
Learning Rate: adaptive
Oversampling Method: ada
----------------
Invalid Config
Configuration:
Loss: hinge
Tolerance: 0.09178089774483214
Alpha: 919.7629488827185
Eta0: 249.43452177913156
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7419059037326777
Configuration:
Loss: hinge
Tolerance: 0.09183977



Results: -0.6463910038252987
Configuration:
Loss: log_loss
Tolerance: 0.020269857015177195
Alpha: 995.9467141802196
Eta0: 3.4899704979771387
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.7434291710416078
Configuration:
Loss: hinge
Tolerance: 0.09680814050089154
Alpha: 965.0752838499185
Eta0: 248.2566399723032
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7433662681974228
Configuration:
Loss: modified_huber
Tolerance: 0.03753844776734414
Alpha: 243.96887562066996
Eta0: 995.5377483196107
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7427046148083957
Configuration:
Loss: hinge
Tolerance: 0.0949353079442138
Alpha: 984.0405577028181
Eta0: 247.12095205709576
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7438473041707979
Configuration:
Loss: hinge
Tolerance: 0.024145695727348544
Alpha: 250.9007777846847
Eta0: 993.614052658057
Learning Rate: constant
Oversampling Me



Results: -0.7489840027865142
Configuration:
Loss: squared_hinge
Tolerance: 0.07335619438264815
Alpha: 650.667720960686
Eta0: 250.18194839400186
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7420070033457494
Configuration:
Loss: squared_hinge
Tolerance: 0.1
Alpha: 786.6657806621564
Eta0: 250.04576816238898
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7438260595150156
Configuration:
Loss: hinge
Tolerance: 0.0852750109616934
Alpha: 908.4114472463044
Eta0: 248.30912294697697
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7440347413745266
Configuration:
Loss: squared_hinge
Tolerance: 0.09992573507762717
Alpha: 684.9908413550412
Eta0: 250.09502850325345
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.742974045458343
Configuration:
Loss: squared_hinge
Tolerance: 0.1
Alpha: 775.9898284936661
Eta0: 248.56712840777212
Learning Rate: adaptive
Oversampling Method: none
-



Results: -0.7169425857178324
Configuration:
Loss: hinge
Tolerance: 0.0922977632520786
Alpha: 607.9748208007594
Eta0: 243.2976514895996
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7442624576471277
Configuration:
Loss: hinge
Tolerance: 0.09652678097337607
Alpha: 973.1905330351977
Eta0: 243.47680222238924
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7428493426001325
Configuration:
Loss: squared_hinge
Tolerance: 0.054718196841022436
Alpha: 14.059612678724546
Eta0: 277.3389231361797
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7490354498628807
Configuration:
Loss: squared_hinge
Tolerance: 0.05876942841382965
Alpha: 216.23444862440275
Eta0: 277.5516903351403
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7437978161354231
Configuration:
Loss: hinge
Tolerance: 0.1
Alpha: 1000.0
Eta0: 243.12750579784486
Learning Rate: adaptive
Oversampling Method: ada
------------



Results: -0.7374184777755666
Configuration:
Loss: squared_hinge
Tolerance: 0.058911497987764204
Alpha: 91.61214938626767
Eta0: 276.5223587890364
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7442269328590898
Configuration:
Loss: squared_hinge
Tolerance: 0.0429293864483593
Alpha: 597.5371755403534
Eta0: 276.1933250374264
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.743632504378863
Configuration:
Loss: squared_hinge
Tolerance: 0.06730131722897484
Alpha: 127.32721745629689
Eta0: 276.3675538417552
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7441224968031803
Configuration:
Loss: squared_hinge
Tolerance: 0.0001
Alpha: 0.0001
Eta0: 333.112178485021
Learning Rate: adaptive
Oversampling Method: ada
----------------




Results: -0.7329618248932296
Configuration:
Loss: modified_huber
Tolerance: 0.0432029728191406
Alpha: 0.0001
Eta0: 991.2803476848388
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.8375046504689442
Configuration:
Loss: hinge
Tolerance: 0.071958980695878
Alpha: 662.6054807440204
Eta0: 243.94489664976186
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7433594425411313
Configuration:
Loss: squared_hinge
Tolerance: 0.0058902827661823365
Alpha: 37.93934547910026
Eta0: 295.00935372592323
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7454971853548593
Configuration:
Loss: log_loss
Tolerance: 0.1
Alpha: 17.566693588772075
Eta0: 28.984215627796445
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.7443028292027606
Configuration:
Loss: hinge
Tolerance: 0.08620247577991969
Alpha: 984.645040304702
Eta0: 243.22548536007483
Learning Rate: adaptive
Oversampling Method: ada
---------



Results: -0.6623558397291341
Configuration:
Loss: squared_hinge
Tolerance: 0.055953241329148785
Alpha: 129.20303786523058
Eta0: 276.2927833228129
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7440195214934339
Configuration:
Loss: squared_hinge
Tolerance: 0.07718396469236047
Alpha: 975.1587710281846
Eta0: 245.96829682929769
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7458345946109878
Configuration:
Loss: modified_huber
Tolerance: 0.0374669239637997
Alpha: 0.0001
Eta0: 706.9545329645308
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.8594987646442815
Configuration:
Loss: hinge
Tolerance: 0.07586369399938928
Alpha: 222.14466349145803
Eta0: 30.31770697489462
Learning Rate: invscaling
Oversampling Method: ada
----------------
Results: -0.6352994506360676
Configuration:
Loss: hinge
Tolerance: 0.05806211957130369
Alpha: 941.4139052866715
Eta0: 243.843881918051
Learning Rate: adaptive
Oversamplin



Results: -0.8426441247740563
Configuration:
Loss: hinge
Tolerance: 0.09109701524108813
Alpha: 896.9917097458627
Eta0: 244.41893902687124
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.7442757273076724
Configuration:
Loss: log_loss
Tolerance: 0.02499185027450345
Alpha: 47.31755665261662
Eta0: 416.05881929935873
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.7431588064355562
Configuration:
Loss: log_loss
Tolerance: 0.0001
Alpha: 308.5403998913025
Eta0: 432.45031656199325
Learning Rate: optimal
Oversampling Method: ada
----------------
Results: -0.7435217085927762
Configuration:
Loss: modified_huber
Tolerance: 0.04328565992473496
Alpha: 393.15332876159266
Eta0: 350.02180264423424
Learning Rate: optimal
Oversampling Method: none
----------------
Results: -0.7381516790936953
Configuration:
Loss: log_loss
Tolerance: 0.026439497855200344
Alpha: 301.99679673410435
Eta0: 534.7960943995979
Learning Rate: optimal
Oversampling Method: 



Results: -0.7448914593369412
Configuration:
Loss: modified_huber
Tolerance: 0.0001
Alpha: 0.0001
Eta0: 449.99458392130475
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.859468534654393
Configuration:
Loss: hinge
Tolerance: 0.07013598790420741
Alpha: 0.0001
Eta0: 446.1251136949734
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.8571331479716866
Configuration:
Loss: hinge
Tolerance: 0.09203923388235699
Alpha: 758.8104580748198
Eta0: 243.27446459831657
Learning Rate: adaptive
Oversampling Method: ada
----------------
Results: -0.7394110377798717
Configuration:
Loss: modified_huber
Tolerance: 0.0001
Alpha: 0.0001
Eta0: 445.9752942557845
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.8594226314080675
Configuration:
Loss: hinge
Tolerance: 0.019265736253231483
Alpha: 0.0001
Eta0: 451.00268107847444
Learning Rate: adaptive
Oversampling Method: none
----------------
Results: -0.8570905519513952
Config

In [5]:
df_hyper_tuning.sort_values(by=['roc_auc'], ascending=False)

Unnamed: 0,loss,alpha,eta0,tol,learning_rate,oversampling_method,roc_auc
343,modified_huber,0.000100,708.315758,0.063716,adaptive,none,0.859518
486,modified_huber,0.000100,443.726775,0.003077,adaptive,none,0.859512
377,modified_huber,0.000100,447.146540,0.036661,adaptive,none,0.859511
456,modified_huber,0.000100,445.887255,0.040895,adaptive,none,0.859509
487,modified_huber,0.000100,440.622031,0.020018,adaptive,none,0.859507
...,...,...,...,...,...,...,...
14,log_loss,1000.000000,498.186425,0.100000,constant,ada,0.502275
78,hinge,250.900778,993.614053,0.024146,constant,none,0.500000
227,perceptron,549.303444,9.478125,0.100000,constant,none,0.492087
1,perceptron,587.700130,167.914704,0.073947,constant,ada,0.478584


In [6]:
df_hyper_tuning.to_csv('hyper_tuning/sgd_hyper_tuning.csv', index=False, header=True, encoding='utf-8')

## Results

In [7]:
clf = SGDClassifier(class_weight='balanced', loss=res.x[0], alpha=res.x[1], eta0=res.x[2], max_iter=200, tol=res.x[3], learning_rate=res.x[4])

In [8]:
if res.x[5] == 'none':
    X = loans_train_df.loc[:, loans_train_df.columns != "loan_status"]
    y = loans_train_df["loan_status"]
elif res.x[5] == 'ada':
    X = loans_train_ada_df.loc[:, loans_train_ada_df.columns != "loan_status"]
    y = loans_train_ada_df["loan_status"]
    
clf.fit(X,y)

# Calculate the ROC AUC score
roc_auc = cross_val_score(clf, X, y, cv=3, scoring='roc_auc').mean()
print("Validation AUC:", roc_auc)

Validation AUC: 0.8595204701587827


In [9]:
from joblib import dump
clf.fit(X,y)
dump(clf, './outputs/sgd_model.joblib')

['./outputs/sgd_model.joblib']

# Fitting into Test Data

In [10]:
##Import Testing Dataset
loans_test_df = pd.read_csv('./outputs/cleaned_loans_test.csv')
loans_test_df

Unnamed: 0,id,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,PERSON_HOME_OWNERSHIP_MORTGAGE,...,LOAN_GRADE_B,LOAN_GRADE_C,LOAN_GRADE_D,LOAN_GRADE_E,LOAN_GRADE_F,LOAN_GRADE_G,CB_PERSON_CRED_HIST_LENGTH_11_17,CB_PERSON_CRED_HIST_LENGTH_18_above,CB_PERSON_CRED_HIST_LENGTH_5_10,CB_PERSON_CRED_HIST_LENGTH_5_below
0,58645,-0.755638,0.404383,0.370898,2.836600,0.733635,2.189522,0,-1.080800,0,...,0,0,0,0,1,0,0,0,0,1
1,58646,-0.257331,1.127233,0.520621,0.140622,0.584177,-0.646041,1,0.179926,1,...,0,1,0,0,0,0,0,0,0,1
2,58647,-0.257331,-1.418731,0.479379,-0.937769,0.793331,-0.318861,1,-1.080800,0,...,0,0,0,1,0,0,0,0,0,1
3,58648,0.905387,-0.300610,0.430599,-0.398573,0.340882,-0.209801,0,1.197778,0,...,0,0,0,0,0,0,0,0,1,0
4,58649,-0.257331,1.259932,0.587859,1.039281,0.757634,-0.100741,1,0.179926,1,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39093,97738,-0.921741,-1.332883,0.293930,-1.117500,0.445950,-0.646041,0,0.179926,1,...,1,0,0,0,0,0,0,0,0,1
39094,97739,-0.921741,-0.389963,0.520621,-0.398573,0.073304,-0.100741,0,-0.343323,1,...,0,0,0,0,0,0,0,0,0,1
39095,97740,3.895232,0.098465,0.000000,1.039281,0.224164,0.989861,0,3.513102,1,...,0,0,0,0,0,0,0,1,0,0
39096,97741,-0.921741,-1.019656,0.430599,0.859550,0.727502,2.516703,1,0.179926,1,...,0,0,1,0,0,0,0,0,0,1


In [11]:
X_test = loans_test_df.loc[:, loans_test_df.columns != "id"]
X_test

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,PERSON_HOME_OWNERSHIP_MORTGAGE,PERSON_HOME_OWNERSHIP_OTHER,...,LOAN_GRADE_B,LOAN_GRADE_C,LOAN_GRADE_D,LOAN_GRADE_E,LOAN_GRADE_F,LOAN_GRADE_G,CB_PERSON_CRED_HIST_LENGTH_11_17,CB_PERSON_CRED_HIST_LENGTH_18_above,CB_PERSON_CRED_HIST_LENGTH_5_10,CB_PERSON_CRED_HIST_LENGTH_5_below
0,-0.755638,0.404383,0.370898,2.836600,0.733635,2.189522,0,-1.080800,0,0,...,0,0,0,0,1,0,0,0,0,1
1,-0.257331,1.127233,0.520621,0.140622,0.584177,-0.646041,1,0.179926,1,0,...,0,1,0,0,0,0,0,0,0,1
2,-0.257331,-1.418731,0.479379,-0.937769,0.793331,-0.318861,1,-1.080800,0,0,...,0,0,0,1,0,0,0,0,0,1
3,0.905387,-0.300610,0.430599,-0.398573,0.340882,-0.209801,0,1.197778,0,0,...,0,0,0,0,0,0,0,0,1,0
4,-0.257331,1.259932,0.587859,1.039281,0.757634,-0.100741,1,0.179926,1,0,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39093,-0.921741,-1.332883,0.293930,-1.117500,0.445950,-0.646041,0,0.179926,1,0,...,1,0,0,0,0,0,0,0,0,1
39094,-0.921741,-0.389963,0.520621,-0.398573,0.073304,-0.100741,0,-0.343323,1,0,...,0,0,0,0,0,0,0,0,0,1
39095,3.895232,0.098465,0.000000,1.039281,0.224164,0.989861,0,3.513102,1,0,...,0,0,0,0,0,0,0,1,0,0
39096,-0.921741,-1.019656,0.430599,0.859550,0.727502,2.516703,1,0.179926,1,0,...,0,0,1,0,0,0,0,0,0,1


In [12]:
y_pred = clf.predict(X_test)

In [13]:
loans_predictions_df = loans_test_df["id"].copy(deep=True)
loans_predictions_df = loans_predictions_df.to_frame()
loans_predictions_df.insert(1, 'loan_status', y_pred, True)

In [14]:
loans_predictions_df

Unnamed: 0,id,loan_status
0,58645,1
1,58646,0
2,58647,1
3,58648,0
4,58649,1
...,...,...
39093,97738,0
39094,97739,0
39095,97740,0
39096,97741,1


In [15]:
loans_predictions_df.to_csv('predictions/sgd_predictions.csv', index=False, header=True, encoding='utf-8')