In [65]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [66]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
df = application_df.drop(columns=['EIN', 'NAME', 'SPECIAL_CONSIDERATIONS'])

In [67]:
val_c = df['APPLICATION_TYPE'].value_counts()
print(val_c)

APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: count, dtype: int64


In [68]:

# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
cutoff_value = 500
application_types_to_replace = val_c[val_c < cutoff_value].index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    df['APPLICATION_TYPE'] = df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure replacement was successful
print(df['APPLICATION_TYPE'].value_counts())

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64


In [69]:
print(df['APPLICATION_TYPE'].unique())

['T10' 'T3' 'T5' 'T7' 'T4' 'T6' 'Other' 'T19' 'T8']


In [70]:
app_map = {
    'T10': 0,
    'T3': 1,
    'T5': 2,
    'T7': 3,
    'T4': 4,
    'T6': 5,
    'Other': 6,
    'T19': 7,
    'T8': 8
}
df['APPLICATION_TYPE'] = df['APPLICATION_TYPE'].map(app_map)

print(df)

       APPLICATION_TYPE       AFFILIATION CLASSIFICATION      USE_CASE  \
0                     0       Independent          C1000    ProductDev   
1                     1       Independent          C2000  Preservation   
2                     2  CompanySponsored          C3000    ProductDev   
3                     1  CompanySponsored          C2000  Preservation   
4                     1       Independent          C1000     Heathcare   
...                 ...               ...            ...           ...   
34294                 4       Independent          C1000    ProductDev   
34295                 4  CompanySponsored          C3000    ProductDev   
34296                 1  CompanySponsored          C2000  Preservation   
34297                 2       Independent          C3000    ProductDev   
34298                 1       Independent          C1000  Preservation   

       ORGANIZATION  STATUS     INCOME_AMT   ASK_AMT  IS_SUCCESSFUL  
0       Association       1              

In [71]:
print(df['APPLICATION_TYPE'].unique())

[0 1 2 3 4 5 6 7 8]


In [72]:
value_counts = df['CLASSIFICATION'].value_counts()
filtered_counts = value_counts[value_counts > 1]
print(filtered_counts)

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
C2800       95
C7100       75
C1300       58
C1280       50
C1230       36
C1400       34
C7200       32
C2300       32
C1240       30
C8000       20
C7120       18
C1500       16
C1800       15
C6000       15
C1250       14
C8200       11
C1238       10
C1278       10
C1235        9
C1237        9
C7210        7
C2400        6
C1720        6
C4100        6
C1257        5
C1600        5
C1260        3
C2710        3
C0           3
C3200        2
C1234        2
C1246        2
C1267        2
C1256        2
Name: count, dtype: int64


In [73]:

# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
cutoff_value = 1000
classifications_to_replace = value_counts[value_counts < cutoff_value].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    df['CLASSIFICATION'] = df['CLASSIFICATION'].replace(cls,"Other")

# Check to make sure replacement was successful
print(df['CLASSIFICATION'].value_counts())

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64


In [74]:
print(df['CLASSIFICATION'].unique())

['C1000' 'C2000' 'C3000' 'C1200' 'Other' 'C2100']


In [75]:
class_map = {
    'C1000': 0,
    'C2000': 1,
    'C3000': 2,
    'C1200': 3,
    'Other': 4,
    'C2100': 5
}
df['CLASSIFICATION'] = df['CLASSIFICATION'].map(class_map)

print(df)

       APPLICATION_TYPE       AFFILIATION  CLASSIFICATION      USE_CASE  \
0                     0       Independent               0    ProductDev   
1                     1       Independent               1  Preservation   
2                     2  CompanySponsored               2    ProductDev   
3                     1  CompanySponsored               1  Preservation   
4                     1       Independent               0     Heathcare   
...                 ...               ...             ...           ...   
34294                 4       Independent               0    ProductDev   
34295                 4  CompanySponsored               2    ProductDev   
34296                 1  CompanySponsored               1  Preservation   
34297                 2       Independent               2    ProductDev   
34298                 1       Independent               0  Preservation   

       ORGANIZATION  STATUS     INCOME_AMT   ASK_AMT  IS_SUCCESSFUL  
0       Association       1  

In [76]:
print(df['CLASSIFICATION'].unique())

[0 1 2 3 4 5]


In [77]:
bins = [-float('inf'), 9999, 24999, 99999, 499999, 1000000, 5000000, 10000000, 50000000, float('inf')]
labels = ['0', '1-9999', '10000-24999', '25000-99999', '100000-499999', '1M-5M', '5M-10M', '10M-50M', '50M+']

# Create bins
df['binned'] = pd.cut(df['ASK_AMT'], bins=bins, labels=labels)

# Convert bin labels to numeric codes
df['binned_code'] = df['binned'].astype('category').cat.codes

print(df)

       APPLICATION_TYPE       AFFILIATION  CLASSIFICATION      USE_CASE  \
0                     0       Independent               0    ProductDev   
1                     1       Independent               1  Preservation   
2                     2  CompanySponsored               2    ProductDev   
3                     1  CompanySponsored               1  Preservation   
4                     1       Independent               0     Heathcare   
...                 ...               ...             ...           ...   
34294                 4       Independent               0    ProductDev   
34295                 4  CompanySponsored               2    ProductDev   
34296                 1  CompanySponsored               1  Preservation   
34297                 2       Independent               2    ProductDev   
34298                 1       Independent               0  Preservation   

       ORGANIZATION  STATUS     INCOME_AMT   ASK_AMT  IS_SUCCESSFUL  \
0       Association       1 

In [78]:
print(df['binned_code'].unique())

[0 3 2 6 8 4 5 1 7]


In [79]:
# Convert categorical data to numeric with `pd.get_dummies`
df_dummies = pd.get_dummies(df, columns=['AFFILIATION', 'USE_CASE', 'ORGANIZATION'])
df_dummies.head()

Unnamed: 0,APPLICATION_TYPE,CLASSIFICATION,STATUS,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL,binned,binned_code,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,...,AFFILIATION_Regional,USE_CASE_CommunityServ,USE_CASE_Heathcare,USE_CASE_Other,USE_CASE_Preservation,USE_CASE_ProductDev,ORGANIZATION_Association,ORGANIZATION_Co-operative,ORGANIZATION_Corporation,ORGANIZATION_Trust
0,0,0,1,0,5000,1,0,0,False,False,...,False,False,False,False,False,True,True,False,False,False
1,1,1,1,1-9999,108590,1,25000-99999,3,False,False,...,False,False,False,False,True,False,False,True,False,False
2,2,2,1,0,5000,0,0,0,True,False,...,False,False,False,False,False,True,True,False,False,False
3,1,1,1,10000-24999,6692,1,0,0,True,False,...,False,False,False,False,True,False,False,False,False,True
4,1,0,1,100000-499999,142590,1,25000-99999,3,False,False,...,False,False,True,False,False,False,False,False,False,True


In [80]:
df_dummies.columns

Index(['APPLICATION_TYPE', 'CLASSIFICATION', 'STATUS', 'INCOME_AMT', 'ASK_AMT',
       'IS_SUCCESSFUL', 'binned', 'binned_code',
       'AFFILIATION_CompanySponsored', 'AFFILIATION_Family/Parent',
       'AFFILIATION_Independent', 'AFFILIATION_National', 'AFFILIATION_Other',
       'AFFILIATION_Regional', 'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust'],
      dtype='object')

In [81]:
#Convert INCOME_AMT values into floats and display those values in df
# 0-> 0
# 1-9999 ->1
# 10000-24999 -> 2
# 1M-5M -> 3

income_map = {
    '0': 0,
    '1-9999': 1,
    '10000-24999': 2,
    '25000-99999': 3,
    '100000-499999':4,
    '1M-5M': 5,
    '5M-10M': 6,
    '10M-50M': 7,
    '50M+': 8
}
df_dummies['INCOME_AMT'] = df_dummies['INCOME_AMT'].map(income_map)

print(df_dummies)

       APPLICATION_TYPE  CLASSIFICATION  STATUS  INCOME_AMT   ASK_AMT  \
0                     0               0       1           0      5000   
1                     1               1       1           1    108590   
2                     2               2       1           0      5000   
3                     1               1       1           2      6692   
4                     1               0       1           4    142590   
...                 ...             ...     ...         ...       ...   
34294                 4               0       1           0      5000   
34295                 4               2       1           0      5000   
34296                 1               1       1           0      5000   
34297                 2               2       1           0      5000   
34298                 1               0       1           5  36500179   

       IS_SUCCESSFUL       binned  binned_code  AFFILIATION_CompanySponsored  \
0                  1            0          

In [82]:
print(df_dummies['INCOME_AMT'].unique())

[0 1 2 4 7 3 8 5 6]


In [83]:
print(df_dummies['binned_code'].unique())

[0 3 2 6 8 4 5 1 7]


In [173]:
# Split our preprocessed data into our features and target arrays
X = df_dummies[['APPLICATION_TYPE', 'CLASSIFICATION', 'STATUS', 'INCOME_AMT','binned_code',
       'AFFILIATION_CompanySponsored', 'AFFILIATION_Family/Parent',
       'AFFILIATION_Independent', 'AFFILIATION_National', 'AFFILIATION_Other',
       'AFFILIATION_Regional', 'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust']]

y = df_dummies['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [174]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler on the training data
scaler.fit(X_train)

# Scale the training data
X_train_scaled = scaler.transform(X_train)

# Scale the test data using the same scaler
X_test_scaled = scaler.transform(X_test)

In [151]:
import numpy as np

# Check for NaN values
nan_in_X_train = np.any(np.isnan(X_train))
# Check for infinite values
inf_in_X_train = np.any(np.isinf(X_train))

print(f"NaN values in X_train: {nan_in_X_train}")
print(f"Infinity values in X_train: {inf_in_X_train}")


NaN values in X_train: False
Infinity values in X_train: False


In [106]:
print(df_dummies.dtypes)

APPLICATION_TYPE                   int64
CLASSIFICATION                     int64
STATUS                             int64
INCOME_AMT                         int64
ASK_AMT                            int64
IS_SUCCESSFUL                      int64
binned                          category
binned_code                         int8
AFFILIATION_CompanySponsored        bool
AFFILIATION_Family/Parent           bool
AFFILIATION_Independent             bool
AFFILIATION_National                bool
AFFILIATION_Other                   bool
AFFILIATION_Regional                bool
USE_CASE_CommunityServ              bool
USE_CASE_Heathcare                  bool
USE_CASE_Other                      bool
USE_CASE_Preservation               bool
USE_CASE_ProductDev                 bool
ORGANIZATION_Association            bool
ORGANIZATION_Co-operative           bool
ORGANIZATION_Corporation            bool
ORGANIZATION_Trust                  bool
dtype: object


In [117]:
print(df_dummies['binned_code'].unique())

[0 3 2 6 8 4 5 1 7]


In [175]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=80, activation="relu", input_dim=20))

nn.add(tf.keras.layers.Dense(units=64, activation="relu"))

nn.add(tf.keras.layers.Dense(units=40, activation="relu"))
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=32, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [176]:
# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [179]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7404 - loss: 0.5377
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7407 - loss: 0.5330
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7336 - loss: 0.5438
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7372 - loss: 0.5406
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7307 - loss: 0.5461
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7349 - loss: 0.5406
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7319 - loss: 0.5429
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7373 - loss: 0.5380
Epoch 9/100
[1m804/804[0m [32

In [181]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 1ms/step - accuracy: 0.7221 - loss: 0.5680
Loss: 0.5679894089698792, Accuracy: 0.7220991253852844


Check each column for usefulness in accuracy

In [30]:
# Group by 'APPLICATION_TYPE' and calculate counts
grouped = df.groupby('APPLICATION_TYPE').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   APPLICATION_TYPE  total  successful  percentage_successful
0                 0    528         464              87.878788
1                 1  27037       14388              53.215963
2                 2   1173         900              76.726343
3                 3    725         410              56.551724
4                 4   1542         471              30.544747
5                 5   1216         900              74.013158
6                 6    276         216              78.260870
7                 7   1065         201              18.873239
8                 8    737         311              42.198100
Mean percentage of successful organizations: 57.584770269177426


In [29]:
grouped = df.groupby('CLASSIFICATION').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   CLASSIFICATION  total  successful  percentage_successful
0               0  17326        9760              56.331525
1               1   6074        3231              53.193941
2               2   1918         930              48.488008
3               3   4837        2238              46.268348
4               4   2261        1676              74.126493
5               5   1883         426              22.623473
Mean percentage of successful organizations: 50.171964773429785


In [31]:
grouped = df.groupby('STATUS').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   STATUS  total  successful  percentage_successful
0       0      5           3              60.000000
1       1  34294       18258              53.239634
Mean percentage of successful organizations: 56.619816877587915


In [102]:
grouped = df_dummies.groupby('INCOME_AMT').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   INCOME_AMT  total  successful  percentage_successful
0           0  24388       12577              51.570444
1           1    728         553              75.961538
2           2    543         368              67.771639
3           3   3747        2135              56.978916
4           4   3374        1952              57.854179
5           5    955         445              46.596859
6           6    185          87              47.027027
7           7    240          97              40.416667
8           8    139          47              33.812950
Mean percentage of successful organizations: 53.11002438222491


In [33]:
grouped = df.groupby('binned_code').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   binned_code  total  successful  percentage_successful
0            0  25946       13653              52.620828
1            1   1135         661              58.237885
2            2   2685        1545              57.541899
3            3   2305        1339              58.091106
4            4    651         359              55.145929
5            5    940         439              46.702128
6            6    225          95              42.222222
7            7    258         112              43.410853
8            8    154          58              37.662338
Mean percentage of successful organizations: 50.181687629403775


In [38]:
grouped = df_dummies.groupby('AFFILIATION_CompanySponsored').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_CompanySponsored  total  successful  percentage_successful
0                         False  18594       13049              70.178552
1                          True  15705        5212              33.186883
Mean percentage of successful organizations: 51.68271768968823


In [39]:
grouped = df_dummies.groupby('AFFILIATION_Family/Parent').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_Family/Parent  total  successful  percentage_successful
0                      False  34235       18225              53.234993
1                       True     64          36              56.250000
Mean percentage of successful organizations: 54.742496713889295


In [40]:
grouped = df_dummies.groupby('AFFILIATION_Independent').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_Independent  total  successful  percentage_successful
0                    False  15819        5280              33.377584
1                     True  18480       12981              70.243506
Mean percentage of successful organizations: 51.81054520578984


In [41]:
grouped = df_dummies.groupby('AFFILIATION_National').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_National  total  successful  percentage_successful
0                 False  34266       18241              53.233526
1                  True     33          20              60.606061
Mean percentage of successful organizations: 56.919793275072564


In [42]:
grouped = df_dummies.groupby('AFFILIATION_Other').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_Other  total  successful  percentage_successful
0              False  34295       18257              53.235165
1               True      4           4             100.000000
Mean percentage of successful organizations: 76.61758273800845


In [43]:
grouped = df_dummies.groupby('AFFILIATION_Regional').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   AFFILIATION_Regional  total  successful  percentage_successful
0                 False  34286       18253              53.237473
1                  True     13           8              61.538462
Mean percentage of successful organizations: 57.38796727975985


In [44]:
grouped = df_dummies.groupby('USE_CASE_CommunityServ').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   USE_CASE_CommunityServ  total  successful  percentage_successful
0                   False  33915       18127              53.448327
1                    True    384         134              34.895833
Mean percentage of successful organizations: 44.17208001621701


In [45]:
grouped = df_dummies.groupby('USE_CASE_Heathcare').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   USE_CASE_Heathcare  total  successful  percentage_successful
0               False  34153       18198              53.283753
1                True    146          63              43.150685
Mean percentage of successful organizations: 48.21721872845363


In [46]:
grouped = df_dummies.groupby('USE_CASE_Other').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   USE_CASE_Other  total  successful  percentage_successful
0           False  34296       18258              53.236529
1            True      3           3             100.000000
Mean percentage of successful organizations: 76.6182645206438


In [47]:
grouped = df_dummies.groupby('USE_CASE_Preservation').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   USE_CASE_Preservation  total  successful  percentage_successful
0                  False   6204        3144              50.676983
1                   True  28095       15117              53.806727
Mean percentage of successful organizations: 52.24185488376512


In [48]:
grouped = df_dummies.groupby('USE_CASE_ProductDev').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   USE_CASE_ProductDev  total  successful  percentage_successful
0                False  28628       15317              53.503563
1                 True   5671        2944              51.913243
Mean percentage of successful organizations: 52.70840287984331


In [49]:
grouped = df_dummies.groupby('ORGANIZATION_Association').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   ORGANIZATION_Association  total  successful  percentage_successful
0                     False  24044       14208              59.091665
1                      True  10255        4053              39.522184
Mean percentage of successful organizations: 49.306924790330356


In [50]:
grouped = df_dummies.groupby('ORGANIZATION_Co-operative').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   ORGANIZATION_Co-operative  total  successful  percentage_successful
0                      False  33813       17894              52.920474
1                       True    486         367              75.514403
Mean percentage of successful organizations: 64.21743883297133


In [51]:
grouped = df_dummies.groupby('ORGANIZATION_Corporation').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   ORGANIZATION_Corporation  total  successful  percentage_successful
0                     False  34256       18228              53.211116
1                      True     43          33              76.744186
Mean percentage of successful organizations: 64.97765117365283


In [52]:
grouped = df_dummies.groupby('ORGANIZATION_Trust').agg(
    total=('IS_SUCCESSFUL', 'size'),
    successful=('IS_SUCCESSFUL', 'sum')
)

# Calculate the percentage of successful organizations
grouped['percentage_successful'] = (grouped['successful'] / grouped['total']) * 100

# Reset index to make 'APPLICATION_TYPE' a column again
result = grouped.reset_index()

# Print the result
print(result)

mean_percentage_successful = result['percentage_successful'].mean()
print('Mean percentage of successful organizations:', mean_percentage_successful)

   ORGANIZATION_Trust  total  successful  percentage_successful
0               False  10784        4453              41.292656
1                True  23515       13808              58.719966
Mean percentage of successful organizations: 50.006310882756196


ACCURACY REVIEW:


X= ['APPLICATION_TYPE', 'CLASSIFICATION', 'INCOME_AMT',
       'AFFILIATION_CompanySponsored',
       'AFFILIATION_Independent', 'AFFILIATION_Other',
       'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust']

Loss: 0.5628382563591003, Accuracy: 0.7237317562103271

X= ['APPLICATION_TYPE', 'CLASSIFICATION', 'INCOME_AMT',
       'AFFILIATION_CompanySponsored',
       'AFFILIATION_Independent', 'AFFILIATION_Other',
       'USE_CASE_CommunityServ',
       'USE_CASE_Other', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation']

Loss: 0.5675869584083557, Accuracy: 0.7244315147399902

X= ['APPLICATION_TYPE', 'CLASSIFICATION', 'INCOME_AMT', 'AFFILIATION_CompanySponsored', 'AFFILIATION_Independent', 'AFFILIATION_Other', 'USE_CASE_Other', 'ORGANIZATION_Co-operative', 'ORGANIZATION_Corporation']

Loss: 0.5666381120681763, Accuracy: 0.7232652902603149

X= ['APPLICATION_TYPE', 'AFFILIATION_Other', 'USE_CASE_Other']

Loss: 0.6674543619155884, Accuracy: 0.5693294405937195

X= ['APPLICATION_TYPE', 'CLASSIFICATION', 'INCOME_AMT',
       'AFFILIATION_CompanySponsored',
       'AFFILIATION_Independent', 'AFFILIATION_Other',
       'USE_CASE_Other', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation']

Loss: 0.5657710433006287, Accuracy: 0.7232652902603149

X = 'APPLICATION_TYPE', 'CLASSIFICATION', 'INCOME_AMT',
       'binned_code', 'AFFILIATION_CompanySponsored',
       'AFFILIATION_Independent', 'AFFILIATION_Other',
       'USE_CASE_CommunityServ', 'USE_CASE_Other', 'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust'

Loss: 0.5617151856422424, Accuracy: 0.7244315147399902

X = 'APPLICATION_TYPE', 'CLASSIFICATION', 'STATUS', 'INCOME_AMT','binned_code',
       'AFFILIATION_CompanySponsored', 'AFFILIATION_Family/Parent',
       'AFFILIATION_Independent', 'AFFILIATION_National', 'AFFILIATION_Other',
       'AFFILIATION_Regional', 'USE_CASE_CommunityServ', 'USE_CASE_Heathcare',
       'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev',
       'ORGANIZATION_Association', 'ORGANIZATION_Co-operative',
       'ORGANIZATION_Corporation', 'ORGANIZATION_Trust'

Loss: 0.5679894089698792, Accuracy: 0.7220991253852844

In [182]:
# Export our model to HDF5 file
nn.save('AlphabetSoupCharity_Optimization.h5')

