In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

In [3]:
# Import our input dataset
charity_df = pd.read_csv('Charity_data.csv')
charity_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


# PREPROCESS 

In [None]:
# PREPROCESS NAME COLUMN

In [4]:
# Generate our categorical variable list
charity_cat = charity_df.dtypes[charity_df.dtypes == "object"].index.tolist()

In [5]:
# Check the number of unique values in each column
charity_df[charity_cat].nunique()

NAME                      19568
APPLICATION_TYPE             17
AFFILIATION                   6
CLASSIFICATION               71
USE_CASE                      5
ORGANIZATION                  4
INCOME_AMT                    9
SPECIAL_CONSIDERATIONS        2
dtype: int64

In [8]:
# Print out the NAME value counts
name_counts = charity_df.NAME.value_counts()
name_counts

PARENT BOOSTER USA INC                                                   1260
TOPS CLUB INC                                                             765
UNITED STATES BOWLING CONGRESS INC                                        700
WASHINGTON STATE UNIVERSITY                                               492
AMATEUR ATHLETIC UNION OF THE UNITED STATES INC                           408
                                                                         ... 
CHRISTOPHER XAVIER DICKERSON FOUNDATION                                     1
LLANO FIDDLE FEST                                                           1
SIERRA STREAMS INSTITUTE                                                    1
HONORING HIRING HELPING OUR HEROES OF PINAL                                 1
SAFE ENVIRONMENT THROUGH COMMUNITY UNITY FOR RESPONSIBLE EDUCATION IN       1
Name: NAME, Length: 19568, dtype: int64

In [9]:
# Determine which values to replace
replace_name = list(name_counts[name_counts < 100].index)

In [10]:
# Replace in DataFrame
for NAME in replace_name:
    charity_df.NAME = charity_df.NAME.replace(NAME,"Other")

In [11]:
# Check to make sure binning was successful
charity_df.NAME.value_counts()

Other                                                                 25987
PARENT BOOSTER USA INC                                                 1260
TOPS CLUB INC                                                           765
UNITED STATES BOWLING CONGRESS INC                                      700
WASHINGTON STATE UNIVERSITY                                             492
AMATEUR ATHLETIC UNION OF THE UNITED STATES INC                         408
PTA TEXAS CONGRESS                                                      368
SOROPTIMIST INTERNATIONAL OF THE AMERICAS INC                           331
ALPHA PHI SIGMA                                                         313
TOASTMASTERS INTERNATIONAL                                              293
MOST WORSHIPFUL STRINGER FREE AND ACCEPTED MASONS                       287
LITTLE LEAGUE BASEBALL INC                                              277
INTERNATIONAL ASSOCIATION OF LIONS CLUBS                                266
MOMS CLUB   

In [15]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(charity_df.NAME.values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['NAME'])
encode_df.head()

Unnamed: 0,NAME_ALPHA PHI SIGMA,NAME_AMATEUR ATHLETIC UNION OF THE UNITED STATES INC,NAME_AMERICAN ASSOCIATION OF UNIVERSITY WOMEN,NAME_CIVITAN INTERNATIONAL,NAME_DEMOLAY INTERNATIONAL,NAME_FARMERS EDUCATIONAL AND COOPERATIVE UNION OF AMERICA,NAME_HABITAT FOR HUMANITY INTERNATIONAL INC,NAME_HONOR SOCIETY OF PHI KAPPA PHI,NAME_INTERNATIONAL ASSOCIATION OF LIONS CLUBS,NAME_INTERNATIONAL ASSOCIATION OF SHEET METAL AIR RAIL & TRANSPORTATION,...,NAME_SOROPTIMIST INTERNATIONAL OF THE AMERICAS INC,NAME_TENNESSEE ORDER OF THE EASTERN STAR,NAME_THE UNITED STATES PONY CLUBS INC,NAME_TOASTMASTERS INTERNATIONAL,NAME_TOPS CLUB INC,NAME_UNITED STATES BOWLING CONGRESS INC,NAME_UNIVERSITY OF WYOMING,NAME_VETERANS OF FOREIGN WARS OF THE UNITED STATES AUXILIARY,NAME_WASHINGTON STATE GRANGE,NAME_WASHINGTON STATE UNIVERSITY
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
# Merge the two DataFrames together and drop the Country column
df =charity_df.merge(encode_df,left_index=True,right_index=True).drop("NAME",1)
df.head()

Unnamed: 0,EIN,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,...,NAME_SOROPTIMIST INTERNATIONAL OF THE AMERICAS INC,NAME_TENNESSEE ORDER OF THE EASTERN STAR,NAME_THE UNITED STATES PONY CLUBS INC,NAME_TOASTMASTERS INTERNATIONAL,NAME_TOPS CLUB INC,NAME_UNITED STATES BOWLING CONGRESS INC,NAME_UNIVERSITY OF WYOMING,NAME_VETERANS OF FOREIGN WARS OF THE UNITED STATES AUXILIARY,NAME_WASHINGTON STATE GRANGE,NAME_WASHINGTON STATE UNIVERSITY
0,10520599,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10531628,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10547893,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10553066,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10556103,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# PREPROCESS CLASSIFICATION COLUMN

In [18]:
# Print out the CLASSIFICATION value counts
classification_counts = charity_df.CLASSIFICATION.value_counts()
classification_counts

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C1820        1
C2500        1
C4500        1
C1580        1
C2170        1
Name: CLASSIFICATION, Length: 71, dtype: int64

In [19]:
# Determine which values to replace
replace_classification = list(classification_counts[classification_counts < 100].index)


In [29]:
# Replace in DataFrame
for CLASSIFICATION in replace_classification:
    df.CLASSIFICATION = df.CLASSIFICATION.replace(CLASSIFICATION,"Other")


In [30]:
# Check to make sure binning was successful

df.CLASSIFICATION.value_counts()

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
Other      669
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
Name: CLASSIFICATION, dtype: int64

In [31]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_cl_df = pd.DataFrame(enc.fit_transform(df.CLASSIFICATION.values.reshape(-1,1)))

# Rename encoded columns
encode_cl_df.columns = enc.get_feature_names(['CLASSIFICATION'])
encode_cl_df.head()

Unnamed: 0,CLASSIFICATION_C1000,CLASSIFICATION_C1200,CLASSIFICATION_C1270,CLASSIFICATION_C1700,CLASSIFICATION_C2000,CLASSIFICATION_C2100,CLASSIFICATION_C2700,CLASSIFICATION_C3000,CLASSIFICATION_C4000,CLASSIFICATION_C5000,CLASSIFICATION_C7000,CLASSIFICATION_Other
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
# Merge the two DataFrames together and drop the Country column
df2=df.merge(encode_cl_df,left_index=True,right_index=True).drop("CLASSIFICATION",1)
df2.head()


Unnamed: 0,EIN,APPLICATION_TYPE,AFFILIATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,...,CLASSIFICATION_C1270,CLASSIFICATION_C1700,CLASSIFICATION_C2000,CLASSIFICATION_C2100,CLASSIFICATION_C2700,CLASSIFICATION_C3000,CLASSIFICATION_C4000,CLASSIFICATION_C5000,CLASSIFICATION_C7000,CLASSIFICATION_Other
0,10520599,T10,Independent,ProductDev,Association,1,0,N,5000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10531628,T3,Independent,Preservation,Co-operative,1,1-9999,N,108590,1,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10547893,T5,CompanySponsored,ProductDev,Association,1,0,N,5000,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,10553066,T3,CompanySponsored,Preservation,Trust,1,10000-24999,N,6692,1,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10556103,T3,Independent,Heathcare,Trust,1,100000-499999,N,142590,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
df2.columns

Index(['EIN', 'APPLICATION_TYPE', 'AFFILIATION', 'USE_CASE', 'ORGANIZATION',
       'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT',
       'IS_SUCCESSFUL', 'NAME_ALPHA PHI SIGMA',
       'NAME_AMATEUR ATHLETIC UNION OF THE UNITED STATES INC',
       'NAME_AMERICAN ASSOCIATION OF UNIVERSITY WOMEN',
       'NAME_CIVITAN INTERNATIONAL', 'NAME_DEMOLAY INTERNATIONAL',
       'NAME_FARMERS EDUCATIONAL AND COOPERATIVE UNION OF AMERICA',
       'NAME_HABITAT FOR HUMANITY INTERNATIONAL INC',
       'NAME_HONOR SOCIETY OF PHI KAPPA PHI',
       'NAME_INTERNATIONAL ASSOCIATION OF LIONS CLUBS',
       'NAME_INTERNATIONAL ASSOCIATION OF SHEET METAL AIR RAIL & TRANSPORTATION',
       'NAME_KNIGHTS OF COLUMBUS', 'NAME_LITTLE LEAGUE BASEBALL INC',
       'NAME_MOMS CLUB', 'NAME_MONTANA 4-H FOUNDATION INC',
       'NAME_MOST WORSHIPFUL STRINGER FREE AND ACCEPTED MASONS', 'NAME_Other',
       'NAME_PARENT BOOSTER USA INC', 'NAME_PTA TEXAS CONGRESS',
       'NAME_PTA UTAH CONGRESS', 'NAME_SE

In [None]:
# PREPROCESS APPLICATION TYPE COLUMN

In [37]:
# Print out the APPLICATION TYPE value counts
app_counts = charity_df.APPLICATION_TYPE.value_counts()
app_counts

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T14        3
T25        3
T15        2
T29        2
T17        1
Name: APPLICATION_TYPE, dtype: int64

In [38]:
# Determine which values to replace
replace_app = list(app_counts[app_counts < 100].index)

In [43]:
# Replace in DataFrame
for APPLICATION_TYPE in replace_app:
    df2.APPLICATION_TYPE = df2.APPLICATION_TYPE.replace(APPLICATION_TYPE,"Other")

In [44]:
# Check to make sure binning was successful
df2.APPLICATION_TYPE.value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
T9         156
Other      120
Name: APPLICATION_TYPE, dtype: int64

In [45]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_app_df = pd.DataFrame(enc.fit_transform(df2.APPLICATION_TYPE.values.reshape(-1,1)))

# Rename encoded columns
encode_app_df.columns = enc.get_feature_names(['APP_TYPE'])
encode_app_df.head()

Unnamed: 0,APP_TYPE_Other,APP_TYPE_T10,APP_TYPE_T19,APP_TYPE_T3,APP_TYPE_T4,APP_TYPE_T5,APP_TYPE_T6,APP_TYPE_T7,APP_TYPE_T8,APP_TYPE_T9
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
# Merge the two DataFrames together and drop the Country column
df3=df2.merge(encode_app_df,left_index=True,right_index=True).drop("APPLICATION_TYPE",1)
df3.head()


Unnamed: 0,EIN,AFFILIATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,NAME_ALPHA PHI SIGMA,...,APP_TYPE_Other,APP_TYPE_T10,APP_TYPE_T19,APP_TYPE_T3,APP_TYPE_T4,APP_TYPE_T5,APP_TYPE_T6,APP_TYPE_T7,APP_TYPE_T8,APP_TYPE_T9
0,10520599,Independent,ProductDev,Association,1,0,N,5000,1,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10531628,Independent,Preservation,Co-operative,1,1-9999,N,108590,1,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10547893,CompanySponsored,ProductDev,Association,1,0,N,5000,0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,10553066,CompanySponsored,Preservation,Trust,1,10000-24999,N,6692,1,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10556103,Independent,Heathcare,Trust,1,100000-499999,N,142590,1,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
df3.columns


Index(['EIN', 'AFFILIATION', 'USE_CASE', 'ORGANIZATION', 'STATUS',
       'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT', 'IS_SUCCESSFUL',
       'NAME_ALPHA PHI SIGMA',
       'NAME_AMATEUR ATHLETIC UNION OF THE UNITED STATES INC',
       'NAME_AMERICAN ASSOCIATION OF UNIVERSITY WOMEN',
       'NAME_CIVITAN INTERNATIONAL', 'NAME_DEMOLAY INTERNATIONAL',
       'NAME_FARMERS EDUCATIONAL AND COOPERATIVE UNION OF AMERICA',
       'NAME_HABITAT FOR HUMANITY INTERNATIONAL INC',
       'NAME_HONOR SOCIETY OF PHI KAPPA PHI',
       'NAME_INTERNATIONAL ASSOCIATION OF LIONS CLUBS',
       'NAME_INTERNATIONAL ASSOCIATION OF SHEET METAL AIR RAIL & TRANSPORTATION',
       'NAME_KNIGHTS OF COLUMBUS', 'NAME_LITTLE LEAGUE BASEBALL INC',
       'NAME_MOMS CLUB', 'NAME_MONTANA 4-H FOUNDATION INC',
       'NAME_MOST WORSHIPFUL STRINGER FREE AND ACCEPTED MASONS', 'NAME_Other',
       'NAME_PARENT BOOSTER USA INC', 'NAME_PTA TEXAS CONGRESS',
       'NAME_PTA UTAH CONGRESS', 'NAME_SERTOMA INC',
       '

In [None]:
# PREPROCESS rest of the COLUMNs

In [54]:
# Generate our categorical variable list
df3_cat = df3.dtypes[df3.dtypes == "object"].index.tolist()

In [55]:
# Check the number of unique values in each column
df3[df3_cat].nunique()

AFFILIATION               6
USE_CASE                  5
ORGANIZATION              4
INCOME_AMT                9
SPECIAL_CONSIDERATIONS    2
dtype: int64

In [57]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_final_df = pd.DataFrame(enc.fit_transform(df3[df3_cat]))

# Add the encoded variable names to the DataFrame
encode_final_df.columns = enc.get_feature_names(df3_cat)
encode_final_df.head()

Unnamed: 0,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional,USE_CASE_CommunityServ,USE_CASE_Heathcare,USE_CASE_Other,USE_CASE_Preservation,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [60]:
# Merge one-hot encoded features and drop the originals
df3 = df3.merge(encode_final_df,left_index=True, right_index=True)
df3 = df3.drop(df3_cat,1)
df3.head()




Unnamed: 0,EIN,STATUS,ASK_AMT,IS_SUCCESSFUL,NAME_ALPHA PHI SIGMA,NAME_AMATEUR ATHLETIC UNION OF THE UNITED STATES INC,NAME_AMERICAN ASSOCIATION OF UNIVERSITY WOMEN,NAME_CIVITAN INTERNATIONAL,NAME_DEMOLAY INTERNATIONAL,NAME_FARMERS EDUCATIONAL AND COOPERATIVE UNION OF AMERICA,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,10520599,1,5000,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,10531628,1,108590,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,10547893,1,5000,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,10553066,1,6692,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,10556103,1,142590,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [62]:
df3.columns


Index(['EIN', 'STATUS', 'ASK_AMT', 'IS_SUCCESSFUL', 'NAME_ALPHA PHI SIGMA',
       'NAME_AMATEUR ATHLETIC UNION OF THE UNITED STATES INC',
       'NAME_AMERICAN ASSOCIATION OF UNIVERSITY WOMEN',
       'NAME_CIVITAN INTERNATIONAL', 'NAME_DEMOLAY INTERNATIONAL',
       'NAME_FARMERS EDUCATIONAL AND COOPERATIVE UNION OF AMERICA',
       'NAME_HABITAT FOR HUMANITY INTERNATIONAL INC',
       'NAME_HONOR SOCIETY OF PHI KAPPA PHI',
       'NAME_INTERNATIONAL ASSOCIATION OF LIONS CLUBS',
       'NAME_INTERNATIONAL ASSOCIATION OF SHEET METAL AIR RAIL & TRANSPORTATION',
       'NAME_KNIGHTS OF COLUMBUS', 'NAME_LITTLE LEAGUE BASEBALL INC',
       'NAME_MOMS CLUB', 'NAME_MONTANA 4-H FOUNDATION INC',
       'NAME_MOST WORSHIPFUL STRINGER FREE AND ACCEPTED MASONS', 'NAME_Other',
       'NAME_PARENT BOOSTER USA INC', 'NAME_PTA TEXAS CONGRESS',
       'NAME_PTA UTAH CONGRESS', 'NAME_SERTOMA INC',
       'NAME_SIGMA BETA DELTA INC',
       'NAME_SOROPTIMIST INTERNATIONAL OF THE AMERICAS INC',
       '

In [None]:
# Standardize data using Scikit_Learn's StandradScaler class

In [67]:
# Remove IS_SUCCESSFUL target from features data
y = df3.IS_SUCCESSFUL
X = df3.drop(columns=["IS_SUCCESSFUL"])


In [68]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)


In [69]:
# Create a StandardScaler instance
scaler = StandardScaler()


In [70]:
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)


In [71]:
# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# TRAIN and EVALUATE RANDOM FOREST CLASSIFIER

In [78]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score 

In [79]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

In [80]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)


In [81]:
# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")


 Random forest predictive accuracy: 0.722


# TRAIN and EVALUATE DEEP LEARNING MODEL

In [82]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
8575/8575 - 0s - loss: 0.4929 - acc: 0.7558
Loss: 0.49288796165594206, Accuracy: 0.7558017373085022
