## 1. Import Modules and Preprocessing

In [142]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

# 2.Import dependencies/module 
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

#  Import and read the charity_data.csv.
url = 'https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv'
charity_data_df = pd.read_csv(url)
charity_data_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [143]:
# Drop the non-beneficial ID columns, 'EIN' 
charity_data_df.drop(columns = ['EIN'], inplace=True)
charity_data_df

Unnamed: 0,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
...,...,...,...,...,...,...,...,...,...,...,...
34294,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,T4,Independent,C1000,ProductDev,Association,1,0,N,5000,0
34295,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,T4,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
34296,PTA HAWAII CONGRESS,T3,CompanySponsored,C2000,Preservation,Association,1,0,N,5000,0
34297,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,T5,Independent,C3000,ProductDev,Association,1,0,N,5000,1


In [144]:
# Determine the number of unique values in each column.
for i in charity_data_df.columns:
    print(i, len(charity_data_df[i].unique()))

NAME 19568
APPLICATION_TYPE 17
AFFILIATION 6
CLASSIFICATION 71
USE_CASE 5
ORGANIZATION 4
STATUS 2
INCOME_AMT 9
SPECIAL_CONSIDERATIONS 2
ASK_AMT 8747
IS_SUCCESSFUL 2


## 2. Separate the features x from the target y

In [145]:
# Separate the features, X,  from the target variable, y
y = charity_data_df['IS_SUCCESSFUL']
X = charity_data_df.drop(columns='IS_SUCCESSFUL')

# Preview the features data
X.head()

Unnamed: 0,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT
0,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000
1,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590
2,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000
3,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692
4,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590


In [146]:
y[:5]

0    1
1    1
2    0
3    1
4    1
Name: IS_SUCCESSFUL, dtype: int64

## 3. Encode the categorical variables from the features data using get_dummies

In [147]:
X = pd.get_dummies(X)
X.head()



Unnamed: 0,STATUS,ASK_AMT,NAME_1 DAY RANCH RESCUE AND RURAL OKLAHOMA ANIMAL RESOURCE INC,NAME_100 BLACK MEN OF AMERICA,NAME_100 BLACK MEN OF MEMPHIS INC,NAME_100 BLACK MEN OF WEST GEORGIA INC,NAME_1150 WEBSTER STREET INC,NAME_116TH CAVALRY REGIMENT CHAPTER OF THE US CAVALRY & ARMOR ASSOCIATION,NAME_13TH BOMB SQUADRON ASSOCIATION,NAME_146TH ALUMNI ASSOCIATION,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,1,108590,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,1,5000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1,6692,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,142590,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


## 4. Separate the data into training and testing subsets

In [148]:
# Split the dataset using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## 5. Scale the data using  StandardScaler

In [149]:
# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

## 6. Instantiate an K Nearest Neighbour Classifier instance

In [150]:
# Import the KNeighborsClassifier module from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Instantiate the KNeighborsClassifier model with n_neighbors = 3 
knn = KNeighborsClassifier(n_neighbors=3)

## 7. Fit the model using the training data

In [151]:
# Train the model using the training data
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(n_neighbors=3)

## 8. Make Predictions using the testing data

In [152]:
# Create predictions using the testing data
y_pred = knn.predict(X_test_scaled)

## 9. Generate the Classification Report for the test data

In [153]:
# Print the classification report comparing the testing data to the model predictions
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.52      0.93      0.67      4037
           1       0.79      0.25      0.38      4538

    accuracy                           0.57      8575
   macro avg       0.66      0.59      0.52      8575
weighted avg       0.67      0.57      0.52      8575



In [None]:
# The quality of a model predictions are measured with a Classification Report; 3 metrics:

# Accuracy (how often the model is correct, the ratio of correctly predicted observations to the total number of observations)
# for the entire dataset the model was correct 57% of the time: not very accuate so a recommendation'd be to add more samples
#.. datapoints


# Precision (ratio of correctly predicted positive observations to the total predicted positive obsevations -TP & FP):
# Out of the 34,000 plus organisations that received funding from Alphabet Soup over the years, 79% used the money..
# ..effectively and 

# Recall (ratio of correctly predicted positive observations to all predicted observations):
# the calssificta rep brst metrc is recall for the fist grop -ddn'y use money crrectly- fro all the whole populatiom of zero.
# .. the ones that didn't use the money correctly

# Out of all the organisations that actually used the money efectively, the model only predicted this outcome correctly..
# ..for 25% of those organisations.
# 




# 1. TN / True Negative: when a case was negative and predicted negative
# 2. TP / True Positive: when a case was positive and predicted positive
# 3. FN / False Negative: when a case was positive but predicted negative
# 4. FP / False Positive: when a case was negative but predicted positive