**Run the following two cells before you begin.**

In [58]:
%autosave 10

Autosaving every 10 seconds


In [59]:
import pandas as pd
import numpy as np

______________________________________________________________________
**First, import your data set and define the sigmoid function.**
<details>
    <summary>Hint:</summary>
    The definition of the sigmoid is $f(x) = \frac{1}{1 + e^{-X}}$.
</details>

In [60]:
# Import the data set
df=pd.read_csv('cleaned_data.csv')

In [61]:
# Define the sigmoid function
def signumfun(a):
    return ((1)/(1+np.exp(-a)))


**Now, create a train/test split (80/20) with `PAY_1` and `LIMIT_BAL` as features and `default payment next month` as values. Use a random state of 24.**

In [62]:
# Create a train/test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(df[['PAY_1','LIMIT_BAL']],df['default payment next month'],test_size=0.2,random_state=24,stratify=df['default payment next month'])

______________________________________________________________________
**Next, import LogisticRegression, with the default options, but set the solver to `'liblinear'`.**

In [63]:
from sklearn.linear_model import LogisticRegression
log_reg=LogisticRegression(solver='liblinear')

______________________________________________________________________
**Now, train on the training data and obtain predicted classes, as well as class probabilities, using the testing data.**

In [64]:
# Fit the logistic regression model on training data
log_reg.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [65]:
# Make predictions using `.predict()`
log_reg.predict(X_test)

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [66]:
# Find class probabilities using `.predict_proba()`
y_pred_prob=log_reg.predict_proba(X_test)

______________________________________________________________________
**Then, pull out the coefficients and intercept from the trained model and manually calculate predicted probabilities. You'll need to add a column of 1s to your features, to multiply by the intercept.**

In [67]:
# Add column of 1s to features
one=np.ones((X_test.shape[0],1),dtype=int)
X_test=np.append(X_test,one,axis=1)
print(X_test.shape)
X_test

(5333, 3)


array([[     1, 180000,      1],
       [     0,  80000,      1],
       [    -2, 290000,      1],
       ...,
       [    -1, 280000,      1],
       [     0, 110000,      1],
       [     0, 100000,      1]], dtype=int64)

In [68]:
# Get coefficients and intercepts from trained model
c=log_reg.coef_
i=log_reg.intercept_
print('Coefficient:',c)
print('Intercept:',i)
coeffs=np.append(c,i)
print(coeffs)

Coefficient: [[ 8.12153890e-11 -6.82669947e-06]]
Intercept: [-6.64621198e-11]
[ 8.12153890e-11 -6.82669947e-06 -6.64621198e-11]


In [69]:
# Manually calculate predicted probabilities
Z=np.dot(coeffs,X_test.T)
print(len(Z))
a=signumfun(Z)
print(a)

5333
[0.22639049 0.36676136 0.12134625 ... 0.12881514 0.32061718 0.33566566]


______________________________________________________________________
**Next, using a threshold of `0.5`, manually calculate predicted classes. Compare this to the class predictions output by scikit-learn.**

In [70]:
# Manually calculate predicted classes
pred_prob=[]
for i in range(len(a)):
    temp=[]
    temp.append(1-a[i])
    temp.append(a[i])
    pred_prob.append(temp)
print(pred_prob)

pred_prob=np.array(pred_prob)
classes=[]
for i in range(len(pred_prob)):
    if pred_prob[i][0]>0.5:
        classes.append(0.0)
    else:
        classes.append(1.0)
classes

[[0.7736095113716848, 0.22639048862831518], [0.6332386399118541, 0.36676136008814586], [0.8786537467883828, 0.12134625321161717], [0.6332386399118541, 0.36676136008814586], [0.837318180049141, 0.16268181995085892], [0.7736095113716848, 0.22639048862831518], [0.7083679859987118, 0.2916320140012882], [0.8074626888489087, 0.1925373111510913], [0.9160111754065994, 0.08398882459340061], [0.6332386399118541, 0.36676136008814586], [0.7736095114143565, 0.2263904885856435], [0.5340805709442042, 0.4659194290557958], [0.6643343412507602, 0.3356656587492398], [0.7222676559590206, 0.2777323440409793], [0.7357519749961101, 0.26424802500388983], [0.7966261999396095, 0.20337380006039052], [0.7083679859987118, 0.2916320140012882], [0.7488092966413804, 0.25119070335861965], [0.7488092966566564, 0.25119070334334354], [0.551022033910404, 0.44897796608959595], [0.8988529299101329, 0.10114707008986708], [0.9388128981077218, 0.061187101892278196], [0.6489445196770551, 0.3510554803229448], [0.8178537930234575

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0

In [71]:
# Compare to scikit-learn's predicted classes


______________________________________________________________________
**Finally, calculate ROC AUC using both scikit-learn's predicted probabilities, and your manually predicted probabilities, and compare.**

In [74]:
# Use scikit-learn's predicted probabilities to calculate ROC AUC
from sklearn.metrics import roc_auc_score
ruc_score=roc_auc_score(y_test,y_pred_prob[:,1])
ruc_score


0.6378428337186446

In [75]:
# Use manually calculated predicted probabilities to calculate ROC AUC
ruc_score_man=roc_auc_score(y_test,pred_prob[:,1])
ruc_score_man

0.6378428337186446