In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTROTONEURALNETWORKS/2 BUILDINGNEURALNETWORKS/INTROTONEURALNETWORKS BUILDINGNEURALNETWORKS 1 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 5: Directory settings  ####

# Set 'main_dir' to location of the project folder
from pathlib import Path 
home_dir = Path(".").resolve()
main_dir = home_dir.parent.parent
print(main_dir)
data_dir = str(main_dir) + "/data"
print(data_dir)




In [None]:
#=================================================-
#### Slide 6: Loading packages  ####

# Helper packages.
import os
import matplotlib.pyplot as plt
import numpy as np                                    
import pandas as pd
import pickle
from textwrap import wrap
# Scikit-learn package for building a perceptron.
from sklearn.neural_network import MLPClassifier

# Scikit-learn package for data preprocessing.
from sklearn.preprocessing import MinMaxScaler
# Model set up, tuning and model metrics packages.
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import GridSearchCV




In [None]:
#=================================================-
#### Slide 7: Load the data  ####

credit_card = pd.read_csv(str(data_dir) + '/credit_card_data.csv')
print(credit_card.head())




In [None]:
#=================================================-
#### Slide 9: Data at first glance  ####

# The data types.
print(credit_card.dtypes)




In [None]:
#=================================================-
#### Slide 10: Check for NAs in the dataset  ####

# Check for NAs. 
print(credit_card.isnull().sum())




In [None]:
#=================================================-
#### Slide 11: Using fillna() to handle missing values  ####

 # Fill missing values with mean 
credit_card = credit_card.fillna(credit_card.mean()['BILL_AMT1'])
 # Check for NAs in 'BILL_AMT1'. 
print(credit_card.isnull().sum()['BILL_AMT1'])
# Drop an unnecessary identifier column.
credit_card = credit_card.drop('ID',axis = 1)




In [None]:
#=================================================-
#### Slide 15: Transform and replace categorical variables  ####

# Convert 'sex' into dummy variables.
sex = pd.get_dummies(credit_card['SEX'], prefix = 'sex', drop_first = True)
# Convert 'education' into dummy variables.
education = pd.get_dummies(credit_card['EDUCATION'], prefix = 'education', drop_first = True)
# Convert 'marriage' into dummy variables.
marriage = pd.get_dummies(credit_card['MARRIAGE'], prefix = 'marriage', drop_first = True)
# Drop `sex`, `education`, `marriage` from the data.
credit_card.drop(['SEX', 'EDUCATION', 'MARRIAGE'], axis = 1, inplace = True)
# Concatenate `sex`, `education`, `marriage` dummies to our dataset.
credit_card = pd.concat([credit_card, sex, education, marriage], axis=1)
print(credit_card.head())




In [None]:
#=================================================-
#### Slide 16: Data prep: split  ####

# Separate predictors from data.
X = credit_card.drop(['default_payment_next_month'], axis=1)

# Separate target from data.
y = credit_card['default_payment_next_month']
# Split data into train and test set, use a 70 - 30 split.
X_train, X_test, y_train, y_test = train_test_split(X.values,
                                                    y,
                                                    test_size = .3,
                                                    random_state = 1)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)




In [None]:
#=================================================-
#### Slide 17: Data prep: scale with MinMaxScaler  ####

# Transforms each feature to a given range.
# The default is the range between 0 and 1.
min_max_scaler = preprocessing.MinMaxScaler()
X_train_scaled = min_max_scaler.fit_transform(X_train)
X_test_scaled = min_max_scaler.transform(X_test)


#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################


In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTROTONEURALNETWORKS/2 BUILDINGNEURALNETWORKS/INTROTONEURALNETWORKS BUILDINGNEURALNETWORKS 2 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 2: Build the model architecture  ####

# Build neural network model
nn = MLPClassifier(hidden_layer_sizes = (64), #<- 64 neurons for hidden layer
                   random_state = 1)          #<- set seed to 1





In [None]:
#=================================================-
#### Slide 3: Fit the model to training data  ####

# Fit the saved model to your training data.
fit_nn = nn.fit(X_train_scaled, y_train)  




In [None]:
#=================================================-
#### Slide 4: Inspect accuracy of training model  ####

# Compute accuracy using training data.
acc_train_nn = fit_nn.score(X_train_scaled, 
                            y_train)
print ("Train Accuracy:", acc_train_nn)




In [None]:
#=================================================-
#### Slide 10: Predict on test data  ####

# Predict on test data.
predicted_values_nn = fit_nn.predict(X_test_scaled)
print(predicted_values_nn)

# Compute test model accuracy score.
test_accuracy_score = metrics.accuracy_score(y_test, predicted_values_nn)
print("Accuracy on test data: ", test_accuracy_score)




In [None]:
#=================================================-
#### Slide 11: Confusion matrix   ####

# Take a look at test data confusion matrix.
conf_matrix_test = metrics.confusion_matrix(y_test, predicted_values_nn)
print(conf_matrix_test)




In [None]:
#=================================================-
#### Slide 19: Classification report  ####

# Create a list of target names to interpret class assignments.
target_names = ['default_payment_0', 'default_payment_1']
# Print an entire classification report.
class_report = metrics.classification_report(y_test, 
                                             predicted_values_nn, 
                                             target_names = target_names)
print(class_report)




In [None]:
#=================================================-
#### Slide 22: Getting probabilities instead of class labels  ####

# Get probabilities instead of predicted values.
test_probabilities = fit_nn.predict_proba(X_test_scaled)

# Get probabilities of test predictions only.
test_predictions = test_probabilities[:, 1]




In [None]:
#=================================================-
#### Slide 23: Computing FPR, TPR, and threshold  ####

# Get FPR, TPR, and threshold values.
fpr, tpr, threshold = metrics.roc_curve(y_test,            #<- test data labels
                                        test_predictions)  #<- predicted probabilities
print("False positive: ", fpr)
print("True positive: ", tpr)
print("Threshold: ", threshold)




In [None]:
#=================================================-
#### Slide 24: Computing AUC  ####

# Get AUC by providing the FPR and TPR.
auc = metrics.auc(fpr, tpr)
print("Area under the ROC curve: ", auc)




In [None]:
#=================================================-
#### Slide 25: Putting it all together: ROC plot  ####

# Make an ROC curve plot.
plt.title('Receiver Operator Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()




In [None]:
#=================================================-
#### Slide 26: Putting it all together: ROC plot (cont'd)  ####

# Make an ROC curve plot.
plt.title('Receiver Operator Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()


#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################


In [None]:
#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## INTROTONEURALNETWORKS/2 BUILDINGNEURALNETWORKS/INTROTONEURALNETWORKS BUILDINGNEURALNETWORKS 3 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs




In [None]:
#=================================================-
#### Slide 12: Fitting & visualizing training history: accuracy   ####

N_EPOCHS = 25                     #<- number of epochs
N_CLASSES = np.unique(y_train)    #<- number of classes in the target variable 

# Build neural network model by creating a classifier:
# add the number of hidden neurons in the 1st hidden layer and set random state.
mlp = MLPClassifier(hidden_layer_sizes = (64), random_state = 1)          

scores_train = [] #<- we will store scores for training history here
scores_test = []  #<- we will store scores for testing history here

epoch = 0 #<- set epoch count
while epoch < N_EPOCHS:
    mlp_fit = mlp.partial_fit(X_train_scaled, y_train, classes=N_CLASSES)
    
    # Compute score for train data.
    scores_train.append(mlp.score(X_train_scaled, y_train))
    
    # Compute score for test data.
    scores_test.append(mlp.score(X_test_scaled, y_test))
    epoch += 1 #<- increment the epoch





In [None]:
#=================================================-
#### Slide 13: Inspect model accuracy and loss  ####

plt.plot(scores_train, color='green', alpha=0.8, label='Train')
plt.plot(scores_test, color='magenta', alpha=0.8, label='Test')
plt.title("Accuracy over epochs", fontsize=14)
plt.xlabel('Epochs')
plt.legend(loc='upper left')
plt.show()
plt.plot(mlp.loss_curve_)
plt.title("Loss over epochs", fontsize=14)
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.show()                      




In [None]:
#=================================================-
#### Slide 14: Inspect model accuracy and loss (cont'd)  ####

plt.plot(scores_train, color='green', alpha=0.8, label='Train')
plt.plot(scores_test, color='magenta', alpha=0.8, label='Test')
plt.title("Accuracy over epochs", fontsize=14)
plt.xlabel('Epochs')
plt.legend(loc='upper left')
plt.show()




In [None]:
#=================================================-
#### Slide 17: Class-imbalanced dataset challenges  ####

print(credit_card['default_payment_next_month'].value_counts())




In [None]:
#=================================================-
#### Slide 20: Balancing target with SMOTE (cont'd)  ####

from imblearn.over_sampling import SMOTE

# Let's initialize SMOTE object.
smote = SMOTE()

# We can now fit the sampling method to our train data and labels.
X_train_sm, y_train_sm = smote.fit_resample(X_train_scaled, y_train)
print(y_train_sm.value_counts())




In [None]:
#=================================================-
#### Slide 23: Exercise  ####




#######################################################
####  CONGRATULATIONS ON COMPLETING THIS MODULE!   ####
#######################################################
