<a href="https://colab.research.google.com/github/CassioSperb/Neural-Networks-and-Deep-Learning-Challenege/blob/main/deep_learning_challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# prompt: how to improve the accuracy of this code? should I do any of the following?
# Dropping more or fewer columns.
# Creating more bins for rare occurrences in columns.
# Increasing or decreasing the number of values for each bin.

!pip install keras_tuner
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
from kerastuner.tuners import RandomSearch, Hyperband
from kerastuner.engine.hyperparameters import HyperParameters

# Import our dependencies


# Import and read the charity_data.csv.
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

# Determine the number of unique values in each column.
unique_value_counts = application_df.nunique()


# More robust binning for APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
cutoff_value_app_type = 100 # Adjust this value
application_types_to_replace = list(application_type_counts[application_type_counts < cutoff_value_app_type].index)
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# More robust binning for CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
cutoff_value_class = 500 # Adjust this value
classifications_to_replace = list(classification_counts[classification_counts < cutoff_value_class].index)

for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Convert categorical data to numeric with `pd.get_dummies`
application_df = pd.get_dummies(application_df, drop_first=True)

# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL']).values


# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Create a StandardScaler instances
scaler = StandardScaler()


# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

def create_model(hp):
    model = keras.Sequential()
    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-100
    hp_units = hp.Int('units', min_value=32, max_value=100, step=32)
    model.add(keras.layers.Dense(units=hp_units, activation='relu', input_shape=(X_train_scaled.shape[1],)))
    # Tune the number of layers
    for i in range(hp.Int('num_layers', 1, 3)):  # Add 1 to 3 hidden layers
        model.add(keras.layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=100, step=32),
                                    activation='relu'))
    model.add(keras.layers.Dropout(hp.Float('dropout', 0, 0.5, step=0.1))) # Add dropout
    model.add(keras.layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Use Hyperband tuner for more efficient search
tuner = kt.Hyperband(
    create_model,
    objective='val_accuracy',
    max_epochs=50,  # Increased max_epochs for more thorough search
    factor=3,
    directory='my_dir',
    project_name='intro_to_kt'
)
tuner.search(X_train_scaled, y_train, epochs=20, validation_split=0.2)

best_hyper = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hyper.values}")

best_model = tuner.get_best_models(num_models=1)[0]

model_loss, model_accuracy = best_model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Reloading Tuner from my_dir/intro_to_kt/tuner0.json
Best Hyperparameters: {'units': 96, 'num_layers': 3, 'units_0': 96, 'dropout': 0.0, 'learning_rate': 0.001, 'units_1': 96, 'units_2': 64, 'tuner/epochs': 50, 'tuner/initial_epoch': 17, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0049'}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


215/215 - 1s - 3ms/step - accuracy: 0.7273 - loss: 0.5583
Loss: 0.5583006143569946, Accuracy: 0.7272594571113586


In [7]:
# Export our model to HDF5 file
best_model.save("AlphabetSoupCharity.h5")

