# Venture Funding with Deep Learning

In [None]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

---

## Prepare the data to be used on a neural network model

In [None]:
# Uploaded applicants_data.csv to Colab
from google.colab import files

csv_file = files.upload()

In [None]:
# Read the applicants_data.csv file from the Resources folder into a Pandas DataFrame
applicant_data_df = pd.read_csv('applicants_data.csv')

# Review the DataFrame
applicant_data_df.head()


In [None]:
# Review the data types associated with the columns
applicant_data_df.dtypes


In [None]:
# Dropped the 'EIN' and 'NAME' columns from the DataFrame
applicant_data_df = applicant_data_df.drop(columns=['EIN', 'NAME'])

# Review the DataFrame
applicant_data_df.head()


In [None]:
# Created a list of categorical variables 
categorical_variables = list(applicant_data_df.dtypes[applicant_data_df.dtypes == "object"].index)

# Display the categorical variables list
categorical_variables


In [None]:
# Created a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)


In [None]:
# Encoded the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(applicant_data_df[categorical_variables])


In [None]:
# Created a DataFrame with the encoded variables
encoded_df = pd.DataFrame(encoded_data, columns = enc.get_feature_names(categorical_variables))

# Review the DataFrame
encoded_df.head()


In [None]:
# Created a DataFrame with the columnns containing numerical variables from the original dataset
numerical_variables_df = applicant_data_df.drop(columns = categorical_variables)

# Review the DataFrame
numerical_variables_df.head()

In [None]:
# Added the numerical variables from the original DataFrame to the one-hot encoding DataFrame
encoded_df = pd.concat([numerical_variables_df,encoded_df], axis=1)


# Review the Dataframe
encoded_df.head()


In [None]:
# Defined the target set y using the IS_SUCCESSFUL column
y = encoded_df['IS_SUCCESSFUL']

# Display a sample of y
y[:5]


In [None]:
# Defined features set X by selecting all columns but IS_SUCCESSFUL
X = encoded_df.drop(columns=['IS_SUCCESSFUL'])

# Review the features DataFrame
X.head()


In [None]:
# Split the preprocessed data into a training and testing dataset
# Assigned the function random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


### Step 7: Use scikit-learn's `StandardScaler` to scale the features data.

In [None]:
# Created a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Transformed the scaler to the features training/test dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Defined the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features


In [None]:
# Defined the number of neurons in the output layer
number_output_neurons = 1

In [None]:
# Defined the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 = (number_input_features + number_output_neurons) // 2 

# Review the number hidden nodes in the first layer
hidden_nodes_layer1


In [None]:
# Defined the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 =  (hidden_nodes_layer1 + number_output_neurons) // 2

# Review the number hidden nodes in the second layer
hidden_nodes_layer2


In [None]:
# Created the Sequential model instance
nn = Sequential()


In [None]:
# Added the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))


In [None]:
# Added the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))


In [None]:
# Added the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))


In [None]:
# Display the Sequential model summary
nn.summary()


In [None]:
# Compiled the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [None]:
# Fit the model using 50 epochs and the training data
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)


In [None]:
# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Set the model's file path
file_path = Path("../Resources/AlphabetSoup.h5")

# Exported model to a HDF5 file
nn.save(file_path)

---

## Optimize the neural network model


### Alternative Model 1

In [None]:
# Defined the the number of inputs (features) to the model
number_input_features_A1 = len(X_train.iloc[0])

# Review the number of features
number_input_features_A1

In [None]:
# Defined the number of neurons in the output layer
number_output_neurons_A1 = 1

In [None]:
# Defined the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A1 = (number_input_features_A1 + number_output_neurons_A1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A1

In [None]:
# Defined the number of hidden nodes for the second hidden layer
hidden_nodes_layer2_A1 = (hidden_nodes_layer1_A1 + number_output_neurons_A1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer2_A1

In [None]:
# Defined the number of hidden nodes for the third hidden layer
hidden_nodes_layer3_A1 =  (hidden_nodes_layer2_A1 + number_output_neurons_A1) // 2

# Review the number hidden nodes in the third layer
hidden_nodes_layer3_A1

In [None]:
# Created the Sequential model instance
nn_A1 = Sequential()

In [None]:
# Added first hidden layer
nn_A1.add(Dense(units = hidden_nodes_layer1_A1, input_dim = number_input_features_A1, activation = 'relu'))

# Added second hidden layer
nn_A1.add(Dense(units = hidden_nodes_layer2_A1, activation = 'relu'))

# Added third hidden layer
nn_A1.add(Dense(units = hidden_nodes_layer3_A1, activation = 'relu'))

# Added the output layer to the model specifying the number of output neurons and activation function
nn_A1.add(Dense(units = number_output_neurons_A1, activation = 'sigmoid'))

# Check the structure of the model
nn_A1.summary()

In [None]:
# Compiled the Sequential model
nn_A1.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])


In [None]:
# Fit the model using 50 epochs and the training data
fit_model_A1 = nn_A1.fit(X_train_scaled, y_train, epochs = 50)


In [None]:
# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A1, model_accuracy_A1 = nn_A1.evaluate(X_test_scaled, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A1}, Accuracy: {model_accuracy_A1}")

#### Alternative Model 2

In [None]:
# Dropped the Status and Special Considerations columns from the training and testing data
X_train_A2 = X_train.drop(columns = ['STATUS', 'SPECIAL_CONSIDERATIONS_N', 'SPECIAL_CONSIDERATIONS_Y'])
X_test_A2 = X_test.drop(columns = ['STATUS','SPECIAL_CONSIDERATIONS_N', 'SPECIAL_CONSIDERATIONS_Y'])


In [None]:
# Scaled the new training and testing data using StandardScaler
X_scaler_A2 = StandardScaler()
X_scaler_A2.fit(X_train_A2)
X_train_scaled_A2 = X_scaler_A2.transform(X_train_A2)
X_test_scaled_A2 = X_scaler_A2.transform(X_test_A2)

In [None]:
# Defined the the number of inputs (features) to the model
number_input_features_A2 = len(X_train_A2.iloc[0])

# Review the number of features
number_input_features_A2

In [None]:
# Defined the number of neurons in the output layer
number_output_neurons_A2 = 1

In [None]:
# Defined the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A2 = int(number_input_features_A2 * 0.50)

# Review the number hidden nodes in the first layer
hidden_nodes_layer1_A2

In [None]:
# Defined the number of hidden nodes for the second hidden layer
hidden_nodes_layer2_A2 =  int(number_input_features_A2 * 0.30)

# Review the number hidden nodes in the second layer
hidden_nodes_layer2_A2

In [None]:
# Defined the number of hidden nodes for the third hidden layer
hidden_nodes_layer3_A2 =  int(number_input_features_A2 * 0.20)

# Review the number hidden nodes in the third layer
hidden_nodes_layer3_A2

In [None]:
# Created the Sequential model instance
nn_A2 = Sequential()

In [None]:
# Added first hidden layer
nn_A2.add(Dense(units = hidden_nodes_layer1_A2, input_dim = number_input_features_A2, activation = 'relu'))

# Added second hidden layer
nn_A2.add(Dense(units = hidden_nodes_layer2_A2, activation = 'relu'))

# Added third hidden layer
nn_A2.add(Dense(units = hidden_nodes_layer3_A2, activation = 'relu'))

# Added the output layer to the model specifying the number of output neurons and activation function
nn_A2.add(Dense(units = number_output_neurons_A2, activation = 'sigmoid'))

# Check the structure of the model
nn_A2.summary()

In [None]:
# Compiled the Sequential model
nn_A2.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
# Fit the model using 50 epochs and the training data
fit_model_A2 = nn_A2.fit(X_train_scaled_A2, y_train, epochs = 50)

In [None]:
# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A2, model_accuracy_A2 = nn_A2.evaluate(X_test_scaled_A2, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A2}, Accuracy: {model_accuracy_A2}")

#### Alternative Model 3

In [None]:
# Defined the the number of inputs (features) to the model
number_input_features_A3 = len(X_train.iloc[0])

# Review the number of features
number_input_features_A3

In [None]:
# Defined the number of neurons in the output layer
number_output_neurons_A3 = 1

In [None]:
# Defined the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A3 = int(number_input_features_A3 * 0.60)

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A3

In [None]:
# Defined the number of hidden nodes for the second hidden layer
hidden_nodes_layer2_A3 =  int(number_input_features_A3 * 0.20)

# Review the number hidden nodes in the second layer
hidden_nodes_layer2_A3

In [None]:
# Created the Sequential model instance
nn_A3 = Sequential()

In [None]:
# Added first hidden layer
nn_A3.add(Dense(units = hidden_nodes_layer1_A3, input_dim = number_input_features_A3, activation = 'relu'))

# Added second hidden layer
nn_A3.add(Dense(units = hidden_nodes_layer2_A3, activation = 'relu'))

# Added the output layer to the model specifying the number of output neurons and activation function
nn_A3.add(Dense(units = number_output_neurons_A3, activation = 'sigmoid'))

# Check the structure of the model
nn_A3.summary()

In [None]:
# Compiled the model
nn_A3.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])


In [None]:
# Fit the model
fit_model_A3 = nn_A3.fit(X_train_scaled, y_train, epochs = 50)


In [None]:
# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A3, model_accuracy_A3 = nn_A3.evaluate(X_test_scaled, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A3}, Accuracy: {model_accuracy_A3}")

### Compare the results.

In [None]:
print("Original Model Results")

# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
print("Alternative Model 1 Results")

# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A1, model_accuracy_A1 = nn_A1.evaluate(X_test_scaled, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A1}, Accuracy: {model_accuracy_A1}")

In [None]:
print("Alternative Model 2 Results")

# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A2, model_accuracy_A2 = nn_A2.evaluate(X_test_scaled_A2, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A2}, Accuracy: {model_accuracy_A2}")

In [None]:
print("Alternative Model 3 Results")

# Evaluated the model loss and accuracy metrics using the evaluate method and the test data
model_loss_A3, model_accuracy_A3 = nn_A3.evaluate(X_test_scaled, y_test, verbose = 2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss_A3}, Accuracy: {model_accuracy_A3}")

### Step 3: Save each of your alternative models as an HDF5 file.


In [None]:
# Set the file path for the first alternative model
file_path = "./Resources/Alternative_model_1.h5"
# Exported model to a HDF5 file
nn_A1.save(file_path)

In [None]:
# Set the file path for the second alternative model
file_path = "./Resources/Alternative_model_2.h5"
# Exported model to a HDF5 file
nn_A2.save(file_path)

In [None]:
# Set the file path for the third alternative model
file_path = "./Resources/Alternative_model_3.h5"
# Exported model to a HDF5 file
nn_A3.save(file_path)