# Student Loan Risk with Deep Learning

In [1601]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

---

## Prepare the data to be used on a neural network model

### Step 1: Read the `student_loans.csv` file into a Pandas DataFrame. Review the DataFrame, looking for columns that could eventually define your features and target variables.   

In [1602]:
# Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/mbc/ai/m6/datasets/student_loans.csv"
df = pd.read_csv(file_path)

# Review the DataFrame
df.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [1603]:
# Review the data types associated with the columns
df.dtypes

payment_history           float64
location_parameter        float64
stem_degree_score         float64
gpa_ranking               float64
alumni_success            float64
study_major_code          float64
time_to_completion        float64
finance_workshop_score    float64
cohort_ranking            float64
total_loan_score          float64
financial_aid_score       float64
credit_ranking              int64
dtype: object

### Step 2: Using the preprocessed data, create the features (`X`) and target (`y`) datasets. The target dataset should be defined by the preprocessed DataFrame column “credit_ranking”. The remaining columns should define the features dataset.

In [1604]:
# Define the target set y using the credit_ranking column
y = df["credit_ranking"].values

# Display a sample of y
y

# X = df.drop(columns=["quality"]).values
# y = df["quality"].values

array([5, 5, 5, ..., 6, 5, 6], dtype=int64)

In [1605]:
# Define features set X by selecting all columns but credit_ranking
# X = df.drop(columns="credit_ranking").copy()
X = df.drop(columns=["credit_ranking"]).values
# Review the features DataFrame
X


array([[ 7.4  ,  0.7  ,  0.   , ...,  3.51 ,  0.56 ,  9.4  ],
       [ 7.8  ,  0.88 ,  0.   , ...,  3.2  ,  0.68 ,  9.8  ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  3.26 ,  0.65 ,  9.8  ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  3.42 ,  0.75 , 11.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  3.57 ,  0.71 , 10.2  ],
       [ 6.   ,  0.31 ,  0.47 , ...,  3.39 ,  0.66 , 11.   ]])

### Step 3: Split the features and target sets into training and testing datasets.


In [1606]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y, shuffle=True)



### Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [1607]:
# Create a StandardScaler instance
X_scaler = StandardScaler()
X_scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)

# Fit the scaler to the features training dataset
X_test_scaled = X_scaler.transform(X_test)


---

## Compile and Evaluate a Model Using a Neural Network

### Step 1: Create a deep neural network by assigning the number of input features, the number of layers, and the number of neurons on each layer using Tensorflow’s Keras.

> **Hint** You can start with a two-layer deep neural network model that uses the `relu` activation function for both layers.


In [1608]:
# Define the the number of inputs (features) to the model
number_inputs = 11

# Review the number of features
number_inputs

11

In [1609]:
# Define the number of neurons in the output layer
number_outputs = 1

In [1610]:
# Define the number of hidden nodes for the first hidden layer
number_inputs = 11
number_hidden_nodes = 6

# Review the number hidden nodes in the first layer
First_Layer = Dense(units=number_hidden_nodes, activation="relu", input_dim=number_inputs)
First_Layer

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Dense name=dense_218, built=False>

In [1611]:
# Define the number of hidden nodes for the second hidden layer
number_inputs2 = 6
number_hidden_nodes2 = 3

# Review the number hidden nodes in the second layer
Second_Layer = Dense(units=number_hidden_nodes2, activation="relu", input_dim=number_inputs2)
Second_Layer

<Dense name=dense_219, built=False>

In [1612]:
# Create the Sequential model instance
NN_Mod = Sequential()
NN_Mod

<Sequential name=sequential_76, built=False>

In [1613]:
# Add the first hidden layer
NN_Mod.add(First_Layer)
NN_Mod

<Sequential name=sequential_76, built=True>

In [1614]:
# Add the second hidden layer
NN_Mod.add(Second_Layer)
NN_Mod

<Sequential name=sequential_76, built=True>

In [1615]:
number_classes = 1

# Add the output layer to the model specifying the number of output neurons and activation function
NN_Mod.add(Dense(number_classes, activation="linear"))
NN_Mod

<Sequential name=sequential_76, built=True>

In [1616]:
# Display the Sequential model summary
NN_Mod.summary()

### Step 2: Compile and fit the model using the `mse` loss function, the `adam` optimizer, and the `mse` evaluation metric.


In [1617]:
# Compile the Sequential model
NN_Mod.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

In [1618]:
# Fit the model using 50 epochs and the training data
NN_Mod.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 33.6010 - mse: 33.6010
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 31.6119 - mse: 31.6119
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 29.5693 - mse: 29.5693
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 28.0847 - mse: 28.0847
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 25.4527 - mse: 25.4527
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.6323 - mse: 22.6323
Epoch 7/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 19.5208 - mse: 19.5208
Epoch 8/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 16.0910 - mse: 16.0910
Epoch 9/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss:

<keras.src.callbacks.history.History at 0x24d0dfdee90>

### Step 3: Evaluate the model using the test data to determine the model’s loss and accuracy.


In [1619]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = NN_Mod.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

13/13 - 0s - 11ms/step - loss: 0.8228 - mse: 0.8228
Loss: 0.8227700591087341, Accuracy: 0.8227700591087341


### Step 4: Save and export your model to an HDF5 file, and name the file `student_loans.h5`.


In [1620]:
# Set the model's file path
file_path = Path("./student_loans.keras")

# Export your model to a HDF5 file
NN_Mod.save(file_path)

---
## Predict Loan Repayment Success by Using your Neural Network Model

### Step 1: Reload your saved model.

In [1621]:
# Set the model's file path
file_path = Path("./student_loans.keras")

# Load the model to a new object
nn_imported = tf.keras.models.load_model(file_path)

### Step 2: Make predictions on the testing data.

In [1622]:
# Make predictions on the testing data
predictions = nn_imported.predict(X_test_scaled).round().astype("int32")
predictions

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


array([[5],
       [5],
       [7],
       [6],
       [4],
       [6],
       [5],
       [5],
       [5],
       [5],
       [5],
       [5],
       [6],
       [6],
       [5],
       [5],
       [6],
       [6],
       [8],
       [7],
       [5],
       [6],
       [5],
       [4],
       [5],
       [5],
       [5],
       [5],
       [6],
       [5],
       [6],
       [5],
       [6],
       [5],
       [5],
       [5],
       [6],
       [6],
       [4],
       [5],
       [5],
       [6],
       [7],
       [5],
       [5],
       [5],
       [6],
       [6],
       [6],
       [7],
       [5],
       [5],
       [7],
       [5],
       [8],
       [5],
       [5],
       [7],
       [5],
       [6],
       [6],
       [6],
       [5],
       [7],
       [6],
       [5],
       [5],
       [7],
       [6],
       [6],
       [5],
       [7],
       [5],
       [5],
       [5],
       [5],
       [6],
       [6],
       [7],
       [5],
       [5],
       [5],
       [5],
    

### Step 3: Create a DataFrame to compare the predictions with the actual values.

In [1623]:
# Create a DataFrame to compare the predictions with the actual values
results = pd.DataFrame({"predictions": predictions.ravel(), "actual": y_test})


### Step 4: Display a sample of the DataFrame you created in step 3.

In [1624]:
# Display sample data
results.head()

Unnamed: 0,predictions,actual
0,5,6
1,5,5
2,7,8
3,6,4
4,4,5
