### Deliverable 1: Preprocessing the Data for a Neural Network

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
from pathlib import Path

#  Import and read the charity_data.csv.

# Loading data USING CSV that contains cleanded values for MaritalStatus and Gender 
file_path = Path("../Resources/TravelCleanedSkiLearnImputer.csv")
vacay_df = pd.read_csv(file_path)
vacay_df.head()

Unnamed: 0,CustomerID,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,200000,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Single,1.0,1,2,1,0.0,Manager,20993.0
1,200001,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Single,2.0,0,3,1,2.0,Manager,20130.0
2,200002,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Single,7.0,1,3,0,0.0,Executive,17090.0
3,200003,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Single,2.0,1,5,1,1.0,Executive,17909.0
4,200004,0,37.622265,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Single,1.0,0,5,1,0.0,Executive,18468.0


In [2]:
# Drop the non-beneficial ID columns,
vacay_df = vacay_df.drop(["CustomerID","Designation","NumberOfPersonVisiting","NumberOfChildrenVisiting"],axis = 1)
vacay_df.head()

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,MonthlyIncome
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3.0,Deluxe,3.0,Single,1.0,1,2,1,20993.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,4.0,Deluxe,4.0,Single,2.0,0,3,1,20130.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,4.0,Basic,3.0,Single,7.0,1,3,0,17090.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,3.0,Basic,3.0,Single,2.0,1,5,1,17909.0
4,0,37.622265,Self Enquiry,1,8.0,Small Business,Male,3.0,Basic,4.0,Single,1.0,0,5,1,18468.0


In [3]:
# Determine the number of unique values in each column.
vacay_df.nunique()

ProdTaken                    2
Age                         45
TypeofContact                2
CityTier                     3
DurationOfPitch             35
Occupation                   4
Gender                       2
NumberOfFollowups            7
ProductPitched               5
PreferredPropertyStar        4
MaritalStatus                2
NumberOfTrips               13
Passport                     2
PitchSatisfactionScore       5
OwnCar                       2
MonthlyIncome             2476
dtype: int64

In [4]:
# Generate our categorical variable lists
vacay_cat = vacay_df.dtypes[vacay_df.dtypes == "object"].index.tolist()
vacay_cat

['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus']

In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(vacay_df[vacay_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(vacay_cat)
encode_df.head()

Unnamed: 0,TypeofContact_Company Invited,TypeofContact_Self Enquiry,Occupation_Free Lancer,Occupation_Large Business,Occupation_Salaried,Occupation_Small Business,Gender_Female,Gender_Male,ProductPitched_Basic,ProductPitched_Deluxe,ProductPitched_King,ProductPitched_Standard,ProductPitched_Super Deluxe,MaritalStatus_Married,MaritalStatus_Single
0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [6]:
# Merge one-hot encoded features and drop the originals
vacay_df = vacay_df.merge(encode_df, left_index=True, right_index=True)
vacay_df = vacay_df.drop(vacay_cat,1)
vacay_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,ProdTaken,Age,CityTier,DurationOfPitch,NumberOfFollowups,PreferredPropertyStar,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,...,Occupation_Small Business,Gender_Female,Gender_Male,ProductPitched_Basic,ProductPitched_Deluxe,ProductPitched_King,ProductPitched_Standard,ProductPitched_Super Deluxe,MaritalStatus_Married,MaritalStatus_Single
0,1,41.0,3,6.0,3.0,3.0,1.0,1,2,1,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,0,49.0,1,14.0,4.0,4.0,2.0,0,3,1,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,1,37.0,1,8.0,4.0,3.0,7.0,1,3,0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0,33.0,1,9.0,3.0,3.0,2.0,1,5,1,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0,37.622265,1,8.0,3.0,4.0,1.0,0,5,1,...,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [7]:
# Split our preprocessed data into our features and target arrays
y = vacay_df["ProdTaken"].values
X = vacay_df.drop(["ProdTaken"],1).values
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=78)

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Deliverable 2: Compile, Train and Evaluate the Model

In [9]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                2080      
                                                                 
 dense_1 (Dense)             (None, 30)                2430      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 4,541
Trainable params: 4,541
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

#Define the checkpoint path and filenames
#os.makedirs("checkpoints/", exist_ok=True)
#checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [11]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

#cp_callback = ModelCheckpoint(
    #filepath=checkpoint_path,
    #verbose=1,
    #save_weights_only=True,
    #save_freq = 'epoch',
    #period = 5)

TypeError: __init__() missing 1 required positional argument: 'filepath'

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50,callbacks= [cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
#Export our model to HDF5 file
#nn.save("AlphabetSoupCharity.h5")