## Basic Neural Networks - and Beyond...

**Data Science for Business - Spring 2025**

**Created by Aditya Deshpande and Chris Volinsky**

 Lets see if Neural Nets can improve on our models on the DirectMarketing data set...

In [None]:
#Loading Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelBinarizer

#Installing data

[You can download the data here](https://drive.google.com/uc?export=download&id=1deEx-Ey37F7qznPlIqmaAjjkmkvBtV28).  Each record represents an individual who was targeted with a direct marketing offer.  The offer was a solicitation to make a charitable donation. You'll remember this data set from last chapter!


I've copied all of the data prep code from our last module when we analyzed this data:


In [None]:
df = pd.read_csv("DirectMarketing.csv")
# remove cases where Firstdate == 0 using .loc
df = df.loc[df.Firstdate != 0]


In [None]:

# replace gavr and glast with log versions of same features using .loc
df_clean = df
df_clean['gavr'] = np.log(df.gavr+1)
df_clean['glast'] = np.log(df.glast+1)
income_cat = pd.Categorical(df['Income'], categories=[0,1,2,3,4,5,6,7])
df_clean['Income'] = income_cat

rfaf2_cat = pd.Categorical(df['rfaf2'], categories=[1,2,3,4])
df_clean['rfaf2'] = rfaf2_cat

df_clean = pd.get_dummies(df_clean, columns=['rfaa2', 'pepstrfl','Income','rfaf2'],drop_first=True)
df_clean.head()
# Create a new feature 'tenure'
df_clean['tenure'] = df_clean['Lastdate'] - df_clean['Firstdate']

# maybe check to see this is always greater than zero?
df_clean['tenure'].min()
today = df_clean['Lastdate'].max()
df_clean['recency'] = today - df_clean['Lastdate']

# remove Firstdate and Lastdate
df_clean = df_clean.drop(['Firstdate', 'Lastdate'], axis=1)


In [None]:
df_clean.head()


Unnamed: 0,Amount,glast,gavr,class,rfaa2_E,rfaa2_F,rfaa2_G,pepstrfl_X,Income_1,Income_2,Income_3,Income_4,Income_5,Income_6,Income_7,rfaf2_2,rfaf2_3,rfaf2_4,tenure,recency
0,0.06,3.931826,3.433987,0,False,False,True,False,False,False,True,False,False,False,False,False,False,False,100,193
1,0.16,3.044522,3.070376,1,False,False,True,True,False,True,False,False,False,False,False,False,False,True,401,100
2,0.2,1.791759,2.277267,0,True,False,False,False,False,False,False,False,False,False,False,False,False,True,93,99
3,0.13,3.258097,3.157,0,False,False,True,False,False,False,False,False,False,True,False,True,False,False,194,99
4,0.1,3.258097,2.60269,0,False,False,True,False,False,False,False,False,False,False,False,False,False,False,201,191


In [None]:
X = df_clean.drop(['class'], axis=1)
y = df_clean['class']


#ML Modeling


In [None]:
# start by initializing a dictionary for all of our ROC scores:

model_auc_scores = {}

In [None]:
#Loading Libraries
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
#Split Data into Testing and Training Data
# original random_state = 42 gives results *81, 78, 85)
random_state_value = 99
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = random_state_value)

## Logistic Regression

In [None]:

lrmodel = LogisticRegression(solver="liblinear")
lrmodel.fit(X_train,y_train)

y_pred_lr = lrmodel.predict(X_test)
y_prob_lr = lrmodel.predict_proba(X_test)[:, 1]

In [None]:
# calculate AUC score and store in our dictionary

auc_lr= metrics.roc_auc_score(y_test,y_prob_lr)
print("AUC Score",round(auc_lr,4))

model_auc_scores['Logistic Regression'] = auc_lr


AUC Score 0.6167


## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

rf_model = RandomForestClassifier(max_depth=10, min_samples_split=10)
rf_model.fit(X_train, y_train)
y_prob_rf = rf_model.predict_proba(X_test)[:, 1]
y_pred_rf = rf_model.predict(X_test)
auc_rf = roc_auc_score(y_test, y_prob_rf)
model_auc_scores['Random Forest'] = auc_rf

y_pred_rf = lrmodel.predict(X_test)

auc_rf=metrics.roc_auc_score(y_test, y_prob_rf)

print("AUC Score",round(auc_rf,3))
model_auc_scores['Random Forest'] = auc_rf


AUC Score 0.616


## Neural Networks (using Keras)

In [None]:
#Loading Libraries

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

For NN to run corrrectly, you should scale your data!

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Defining SIMPLE Keras Model
kmodel = Sequential()
kmodel.add(Dense(12,input_shape =(19,), activation = "relu"))
kmodel.add(Dense(8,activation = "relu"))
kmodel.add(Dense(1,activation = "sigmoid"))

#ROC=0.6155 kmodel.fit(X_train_scaled,y_train,epochs = 10, batch_size = 64)
#ROC= 0.610 kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 256)
#ROC= 0.610 kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 124)
#ROC = 0.6142 kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 512)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
#Compile Keras Model
kmodel.compile(loss = "binary_crossentropy", optimizer = "adam", metrics =['accuracy'])


In [None]:
#Fitting Keras Model
kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 256)

Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7968 - loss: 0.4339
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9507 - loss: 0.1963
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9496 - loss: 0.1971
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9494 - loss: 0.1976
Epoch 5/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9491 - loss: 0.1975
Epoch 6/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.9499 - loss: 0.1955
Epoch 7/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9502 - loss: 0.1937
Epoch 8/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9501 - loss: 0.1944
Epoch 9/50
[1m600/600[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x782053256390>

In [None]:
# Get predicted probabilities for the positive class (class 1)
y_prob = kmodel.predict(X_test_scaled)

# Calculate AUC

auc = roc_auc_score(y_test, y_prob)
print("AUC Score",round(auc,4))

model_auc_scores['Simple NN'] = auc


[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
AUC Score 0.6113


In [None]:
## Now we make it more complex, with an extra layer, and Dropout

kmodel = Sequential()
kmodel.add(Dense(12,input_shape =(19,), activation = "relu")) # Change input_shape to (19,)
kmodel.add(Dropout(0.3))  # Add dropout
kmodel.add(Dense(8,activation = "relu"))
kmodel.add(Dropout(0.3))  # Add dropout to the new layer
kmodel.add(Dense(6,activation = "relu"))
kmodel.add(Dense(1,activation = "sigmoid"))

# ROC = 0.618 kmodel.fit(X_train_scaled,y_train,epochs = 100, batch_size = 256)
# ROC = 0.617 kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 512)
# ROC = 0.617 kmodel.fit(X_train_scaled,y_train,epochs = 10, batch_size = 512)
# ROC = 0.6181 kmodel.fit(X_train_scaled,y_train,epochs = 50, batch_size = 256)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
#Compile Keras Model
kmodel.compile(loss = "binary_crossentropy", optimizer = "adam", metrics =['accuracy'])


In [None]:
#Fitting Keras Model
kmodel.fit(X_train_scaled,y_train,epochs = 20, batch_size = 256)

Epoch 1/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.7681 - loss: 0.4932
Epoch 2/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9506 - loss: 0.2251
Epoch 3/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9497 - loss: 0.2097
Epoch 4/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9492 - loss: 0.2045
Epoch 5/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9498 - loss: 0.1996
Epoch 6/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9492 - loss: 0.1996
Epoch 7/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9498 - loss: 0.1969
Epoch 8/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9493 - loss: 0.1977
Epoch 9/20
[1m600/600[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7820513a3210>

In [None]:
# Get predicted probabilities for the positive class (class 1)
y_prob = kmodel.predict(X_test_scaled)

# Calculate AUC

auc = roc_auc_score(y_test, y_prob)
print("AUC Score",round(auc,4))

model_auc_scores['Extra NN'] = auc

[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
AUC Score 0.6192


# Results

In [None]:
print("random state =", random_state_value)

for model_name, auc_score in model_auc_scores.items():
    print(f"{model_name}: AUC = {auc_score:.4f}")



random state = 99
Logistic Regression: AUC = 0.6167
Random Forest: AUC = 0.6159
Simple NN: AUC = 0.6113
Extra NN: AUC = 0.6192


In [None]:
kmodel.summary()