# Import Libraries

In [1]:
# import Libraries
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from prettytable import PrettyTable
from astropy.table import Table, Column

# Load Sample Data

In [2]:
''' 
*---------------------- LOAD_SAMPLE_DATA ------------------------*
|     Function: read_csv()                                       |
|             Purpose: Read a dataset in CSV file format         |
|     Arguments:                                                 |
|             path: Path to dataset file                         |
|             dataset: Dataset file name                         |
|     Return:                                                    |
|             dataset: Dataset in DataFrame format               |
*----------------------------------------------------------------*
'''
 
sample_data = pd.read_csv("C:/Users/ahmad/OneDrive/Desktop/ML/SemesterProject/sample_data.csv")

print("\n\nSample Data:")
print("============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
# Print the first 10 lines
print(sample_data.head(10))

# Print the last 10 lines
print(sample_data.tail(10))



Sample Data:

    Loan_ID Gender Married  Dependents     Education Self_Employed  \
0  LP001002   Male      No           0      Graduate            No   
1  LP001003   Male     Yes           1      Graduate            No   
2  LP001005   Male     Yes           0      Graduate           Yes   
3  LP001006   Male     Yes           0  Not Graduate            No   
4  LP001008   Male      No           0      Graduate            No   
5  LP001011   Male     Yes           2      Graduate           Yes   
6  LP001013   Male     Yes           0  Not Graduate            No   
7  LP001014   Male     Yes           3      Graduate            No   
8  LP001018   Male     Yes           2      Graduate            No   
9  LP001020   Male     Yes           1      Graduate            No   

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0             5849                0.0         140               360   
1             4583             1508.0         128               360   


# # Understand Sample Data

In [3]:
print("\n\nAttributes in Sample Data:")
print("==========================\n")

print(sample_data.columns)

print("\n\nNumber of Instances in Sample Data:",sample_data["Gender"].count())
print("========================================\n")

# Extract the features
features = sample_data[["Gender", "Married", "Dependents", "Education", "Self_Employed", "ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Loan_Amount_Term",
                        "Credit_History", "Property_Area", "Loan_Status"]]

# Save the features to a new CSV file
features.to_csv("features.csv", index=False)
print(features.head(10))



Attributes in Sample Data:

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')


Number of Instances in Sample Data: 614

  Gender Married  Dependents     Education Self_Employed  ApplicantIncome  \
0   Male      No           0      Graduate            No             5849   
1   Male     Yes           1      Graduate            No             4583   
2   Male     Yes           0      Graduate           Yes             3000   
3   Male     Yes           0  Not Graduate            No             2583   
4   Male      No           0      Graduate            No             6000   
5   Male     Yes           2      Graduate           Yes             5417   
6   Male     Yes           0  Not Graduate            No             2333   
7   Male     Yes           3      Graduate            No           

# Train the Label Encoder

In [4]:
''' 
*------------------ TRAIN_LABEL_ENCODER --------------------*
|        Function: Fit()                                    |
|              Purpose: Fit or Train the Label Encoder      |
|        Arguments:                                         |
|               Labels: Target Values                       |
|        Return:                                            |
|               Instance: Returns an instance of self       |
*-----------------------------------------------------------*
'''

# Labels

gender = pd.DataFrame({"Gender": ["Male", "Female"]})
married = pd.DataFrame({"Married": ["Yes", "No"]})
education = pd.DataFrame({"Education": ["Graduate", "Not Graduate"]})
self_employed = pd.DataFrame({"Self_Employed": ["Yes", "No"]})
property_area = pd.DataFrame(
    {"Property_Area": ["Rural", "Urban", "Semiurban"]})
loan_status = pd.DataFrame({"Loan_Status": ["Y", "N"]})

# Initialize the Label Encoders

gender_label_encoder = LabelEncoder()
married_label_encoder = LabelEncoder()
education_label_encoder = LabelEncoder()
self_employed_label_encoder = LabelEncoder()
property_area_label_encoder = LabelEncoder()
loan_status_label_encoder = LabelEncoder()

# Train the Label Encoders

gender_label_encoder.fit(np.ravel(gender))
married_label_encoder.fit(np.ravel(married))
education_label_encoder.fit(np.ravel(education))
self_employed_label_encoder.fit(np.ravel(self_employed))
property_area_label_encoder.fit(np.ravel(property_area))
loan_status_label_encoder.fit(np.ravel(loan_status))


LabelEncoder()

# Label Encoding of the Output

In [5]:
''' 
*------------------ LABEL_ENCODE_OUTPUT --------------------*
|        Function: Transform()                              |
|              Purpose: Transform Input (Categorical)       |
|                       into Numerical Representation       |
|        Arguments:                                         |
|              Attribute: Target values                     |
|        Return:                                            |
|              Attribute: Numerical Representation          |
*-----------------------------------------------------------*
'''

sample_data_encoded_output = features.copy()
original_sample_data = features.copy()

# Transform Output of into Numerical Representation

print("\n\nLoan Status Attribute After Label Encoding:")
print("========================================\n")
features["encoded_loan_status"] = loan_status_label_encoder.transform(
    features['Loan_Status'])
print(features[["Loan_Status", "encoded_loan_status"]])

# Print Original and Encoded Ouput Sample Data

sample_data_encoded_output[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
                            'Credit_History', 'Property_Area', 'Loan_Status']] = features[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
                                                                                           'Credit_History', 'Property_Area', 'encoded_loan_status']]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("\n\nOriginal Sample Data:")
print("=====================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(original_sample_data.head(10))
print("\n\nSample Data after Label Encoding of Output:")
print("===========================================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10))

# Save the Transformed Features into CSV File

sample_data_encoded_output.to_csv(
    r'sample-data-encoded-output.csv', index=False, header=True)




Loan Status Attribute After Label Encoding:

    Loan_Status  encoded_loan_status
0             Y                    1
1             N                    0
2             Y                    1
3             Y                    1
4             Y                    1
5             Y                    1
6             Y                    1
7             N                    0
8             Y                    1
9             N                    0
10            Y                    1
11            Y                    1
12            Y                    1
13            N                    0
14            Y                    1
15            Y                    1
16            Y                    1
17            N                    0
18            N                    0
19            Y                    1
20            N                    0
21            Y                    1
22            N                    0
23            N                    0
24            N             

# Label Encoding of the Input

In [6]:
''' 
*------------------- LABEL_ENCODE_INPUT --------------------*
|        Function: Transform()                              |
|              Purpose: Transform Input (Categorical)       |
|                          into Numerical Representation    |
|        Arguments:                                         |
|              Attribute: Target values                     |
|        Return:                                            |
|              Attribute: Numerical Representation          |
*-----------------------------------------------------------*
'''

sample_data_encoded = sample_data_encoded_output.copy()
sample_data_encoded_output_orignal = sample_data_encoded_output.copy()

# Transform Input Attributes into Numerical Representation

print("\n\nGender Attribute After Label Encoding:")
print("======================================\n")
sample_data_encoded_output["encoded_gender"] = gender_label_encoder.transform(
    sample_data_encoded_output['Gender'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Gender", "encoded_gender"]])

print("\n\nMarried Attribute After Label Encoding:")
print("=======================================\n")
sample_data_encoded_output["encoded_married"] = married_label_encoder.transform(
    sample_data_encoded_output['Married'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Married", "encoded_married"]])

print("\n\nEducation Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_education"] = education_label_encoder.transform(
    sample_data_encoded_output['Education'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Education", "encoded_education"]])

print("\n\nSelf_Employed Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_self_employed"] = self_employed_label_encoder.transform(
    sample_data_encoded_output['Self_Employed'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[
      ["Self_Employed", "encoded_self_employed"]])

print("\n\nProperty_Area Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_property_area"] = property_area_label_encoder.transform(
    sample_data_encoded_output['Property_Area'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[
      ["Property_Area", "encoded_property_area"]])

# Print Original and Encoded Sample Data

sample_data_encoded[['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']] = sample_data_encoded_output[[
    'encoded_gender', 'encoded_married', 'encoded_education', 'encoded_self_employed', 'encoded_property_area', 'Loan_Status']]
print("\n\nOriginal Sample Data:")
print("=====================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(original_sample_data.head(10))
print("\n\nSample Data after Label Encoding:")
print("=================================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded.head(10))

# Save the Transformed Features into CSV File

sample_data_encoded.to_csv(
    r'sample-data-encoded.csv', index=False, header=True)



Gender Attribute After Label Encoding:

  Gender  encoded_gender
0   Male               1
1   Male               1
2   Male               1
3   Male               1
4   Male               1
5   Male               1
6   Male               1
7   Male               1
8   Male               1
9   Male               1


Married Attribute After Label Encoding:

  Married  encoded_married
0      No                0
1     Yes                1
2     Yes                1
3     Yes                1
4      No                0
5     Yes                1
6     Yes                1
7     Yes                1
8     Yes                1
9     Yes                1


Education Attribute After Label Encoding:

      Education  encoded_education
0      Graduate                  0
1      Graduate                  0
2      Graduate                  0
3  Not Graduate                  1
4      Graduate                  0
5      Graduate                  0
6  Not Graduate                  1
7      Graduate   

# Training Phase

# Splitting Input Vectors and Outputs / Labels of sample Data

In [7]:
'''
*----------- SPLIT_INPUT_VECTORS_AND_OUTPUTS/LABELS -----------*
|        Function: iloc()                                      |
|            Purpose: Splitting Input Vector and Labels        |
|        Arguments:                                            |
|            Attribute: Name or Location Attribute to Split    |
|        Return:                                               |
|            Attribute: Split Attributes                       |
*--------------------------------------------------------------*
'''

print("\n\nInput Vectors (Feature Vectors) of Sample Data:")
print("===============================================\n")

input_vector_sample_data = sample_data_encoded.iloc[: , :-1]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(input_vector_sample_data)

print("\n\nOutputs/Labels of Sample Data:")
print("==============================\n")

output_label_sample_data = sample_data_encoded.iloc[: ,-1]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("  Survived")
print(output_label_sample_data)

# Save the Input Vector and Output-Label into CSV File 

input_vector_sample_data.to_csv(r'input-vector-sample-data.csv', index = False, header = True)
output_label_sample_data.to_csv(r'output-label-sample-data.csv', index = False, header = True)



Input Vectors (Feature Vectors) of Sample Data:

     Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
0         1        0           0          0              0             5849   
1         1        1           1          0              0             4583   
2         1        1           0          0              1             3000   
3         1        1           0          1              0             2583   
4         1        0           0          0              0             6000   
5         1        1           2          0              1             5417   
6         1        1           0          1              0             2333   
7         1        1           3          0              0             3036   
8         1        1           2          0              0             4006   
9         1        1           1          0              0            12841   
10        1        1           2          0              0             3200   
1

# Splitting Sample Data into Training Data and Testing Data

In [8]:
    ''' 
*------------------- SPLIT_SAMPLE_DATA ---------------------*
|        Function: train_test_split()                       |
|              Purpose: Split arrays or matrices into       |
|                       random train and test subsets       |
|        Arguments:                                         |
|              arrays: sequence of indexables               |
|              test_size: float or int                      |
|        Return:                                            |
|              splitting: list                              |
*-----------------------------------------------------------*
'''

training_data_encoded, testing_data_encoded = train_test_split(
    sample_data_encoded, test_size=0.2, random_state=0, shuffle=False)

# Save the Training and Testing Data into CSV File

training_data_encoded.to_csv(
    r'training-data-encoded.csv', index=False, header=True)
testing_data_encoded.to_csv(
    r'testing-data-encoded.csv', index=False, header=True)

# print Training and Testing Data

print("\n\nTraining Data:")
print("==============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(training_data_encoded.head(10))
print("\n\nTesting Data:")
print("==============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(testing_data_encoded.head(10))



Training Data:

   Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
0       1        0           0          0              0             5849   
1       1        1           1          0              0             4583   
2       1        1           0          0              1             3000   
3       1        1           0          1              0             2583   
4       1        0           0          0              0             6000   
5       1        1           2          0              1             5417   
6       1        1           0          1              0             2333   
7       1        1           3          0              0             3036   
8       1        1           2          0              0             4006   
9       1        1           1          0              0            12841   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0         140               360              

# Splitting Input Vectors and Outputs / Labels of Training Data

In [9]:
'''
*---------------- SPLIT_INPUT_VECTORS_AND_LABELS --------------*
|        Function: iloc()                                      |
|            Purpose: Splitting Input Vector and Labels        |
|        Arguments:                                            |
|            Attribute: Name or Location Attribute to Split    |
|        Return:                                               |
|            Attribute: Split Attributes                       |
*--------------------------------------------------------------*
'''

print("\n\nInputs Vectors (Feature Vectors) of Training Data:")
print("==================================================\n")
input_vector_train = training_data_encoded.iloc[:, :-1]
print(input_vector_train.head(10))

print("\n\nOutputs/Labels of Training Data:")
print("================================\n")
print("Loan_Status")
output_label_train = training_data_encoded.iloc[:, -1]
print(output_label_train.head(10))



Inputs Vectors (Feature Vectors) of Training Data:

   Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
0       1        0           0          0              0             5849   
1       1        1           1          0              0             4583   
2       1        1           0          0              1             3000   
3       1        1           0          1              0             2583   
4       1        0           0          0              0             6000   
5       1        1           2          0              1             5417   
6       1        1           0          1              0             2333   
7       1        1           3          0              0             3036   
8       1        1           2          0              0             4006   
9       1        1           1          0              0            12841   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0        

# # Splitting Input Vectors and Outputs/Labels of Testing Data

In [10]:
'''
*---------------- SPLIT_INPUT_VECTORS_AND_LABELS --------------*
|        Function: iloc()                                      |
|            Purpose: Splitting Input Vector and Labels        |
|        Arguments:                                            |
|            Attribute: Name or Location Attribute to Split    |
|        Return:                                               |
|            Attribute: Split Attributes                       |
*--------------------------------------------------------------*
'''

print("\n\nInputs Vectors (Feature Vectors) of Testing Data:")
print("=================================================\n")
input_vector_test = testing_data_encoded.iloc[:, :-1]
print(input_vector_test)

print("\n\nOutputs/Labels of Testing Data:")
print("==============================\n")
print("Loan_Status")
output_label_test = testing_data_encoded.iloc[:, -1]
print(output_label_test)




Inputs Vectors (Feature Vectors) of Testing Data:

     Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
491       1        1           1          1              0             5333   
492       1        0           0          1              0             3691   
493       0        0           0          1              1            17263   
494       1        1           0          0              0             3597   
495       0        1           1          0              0             3326   
496       1        1           0          1              0             2600   
497       1        1           0          0              0             4625   
498       1        1           1          0              1             2895   
499       1        0           0          0              0             6283   
500       0        0           0          0              0              645   
501       0        0           0          0              0             3159   

# Training Machine Learning Models by Train-Test Approach

# Logistic Regression by Train-Test Approach

In [11]:
# Train the Logistic Regression

''' 
*--------------- TRAIN_Logistic_Regression ------------------*
|       Function: logistic.Logistic_Regression()                   |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.linear_model import LogisticRegression
print("\n\nTraining the Logistic Regression on Training Data")
print("============================================================\n")
print("\nParameters and their values:")
print("============================\n")
logistic_model = LogisticRegression()
logistic_model.fit(input_vector_train, np.ravel(output_label_train))
print(logistic_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(logistic_model, open('logistic_trained_model.pkl', 'wb'))

#Testing Phase

# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('logistic_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by logistic_trained_model:")
# print("==========================================\n")
# print(model_predictions.head(10))

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Training the Logistic Regression on Training Data


Parameters and their values:

LogisticRegression()


Accuracy Score:

0.85


F1 Score:

0.9


Precision:

0.84


Recall:

0.96


Confusion Matrix:

[[23 16]
 [ 3 81]]


Classification Report:

              precision    recall  f1-score   support

           0       0.88      0.59      0.71        39
           1       0.84      0.96      0.90        84

    accuracy                           0.85       123
   macro avg       0.86      0.78      0.80       123
weighted avg       0.85      0.85      0.84       123



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Random Forest Classifier by Train-Test Approach

In [12]:
# Train the Random Forest

''' 
*--------------- TRAIN_Random_Forest ------------------*
|       Function: rfc.RandomForestClassifier()                     |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.ensemble import RandomForestClassifier
#Train Random Forest:
print("\n\nTraining the Random Forest Classifier on Training Data")
print("========================================================\n")
print("\nParameters and their values:")
print("============================\n")
rfc_model = RandomForestClassifier(n_estimators=100, random_state=0)
rfc_model.fit(input_vector_train, np.ravel(output_label_train))
print(rfc_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(rfc_model, open('rfc_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('rfc_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by rfc_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the Random Forest Classifier on Training Data


Parameters and their values:

RandomForestClassifier(random_state=0)


Accuracy Score:

0.84


F1 Score:

0.89


Precision:

0.83


Recall:

0.95


Confusion Matrix:

[[23 16]
 [ 4 80]]


Classification Report:

              precision    recall  f1-score   support

           0       0.85      0.59      0.70        39
           1       0.83      0.95      0.89        84

    accuracy                           0.84       123
   macro avg       0.84      0.77      0.79       123
weighted avg       0.84      0.84      0.83       123



# Naïve Bayes by Train-Test Approach

In [13]:
# Train the Naive Bayes

''' 
*--------------- TRAIN_Naive_Bayes ------------------*
|       Function: linear.NaiveBayes()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.naive_bayes import GaussianNB
print("\n\nTraining the Naive Bayes classifier on Training Data")
print("========================================================\n")
print("\nParameters and their values:")
print("============================\n")

naive_bayes_model = GaussianNB()
naive_bayes_model.fit(input_vector_train, output_label_train)
print(naive_bayes_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(naive_bayes_model, open('naive_bayes_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('naive_bayes_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by naive_bayes_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)





Training the Naive Bayes classifier on Training Data


Parameters and their values:

GaussianNB()


Accuracy Score:

0.85


F1 Score:

0.89


Precision:

0.85


Recall:

0.94


Confusion Matrix:

[[25 14]
 [ 5 79]]


Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.64      0.72        39
           1       0.85      0.94      0.89        84

    accuracy                           0.85       123
   macro avg       0.84      0.79      0.81       123
weighted avg       0.84      0.85      0.84       123



# Decision Tree Algorithm by Train-Test

In [14]:
# Train the Decision Tree

''' 
*--------------- TRAIN_Decision_Tree ------------------*
|       Function: dtc.DecisionTreeClassifier()                     |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.tree import DecisionTreeClassifier
#Applying Decision Tree Algorithm:
print("\n\nTraining the Decision Tree Classifier on Training Data")
print("========================================================\n")
print("\nParameters and their values:")
print("============================\n")
dtc_model = DecisionTreeClassifier(random_state=0)
dtc_model.fit(input_vector_train,np.ravel(output_label_train))
print(dtc_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(dtc_model, open('dtc_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('dtc_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by dtc_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the Decision Tree Classifier on Training Data


Parameters and their values:

DecisionTreeClassifier(random_state=0)


Accuracy Score:

0.74


F1 Score:

0.81


Precision:

0.82


Recall:

0.8


Confusion Matrix:

[[24 15]
 [17 67]]


Classification Report:

              precision    recall  f1-score   support

           0       0.59      0.62      0.60        39
           1       0.82      0.80      0.81        84

    accuracy                           0.74       123
   macro avg       0.70      0.71      0.70       123
weighted avg       0.74      0.74      0.74       123



# Support Vector Machine by Train-Test

In [15]:
# Train the Support Vector Machine

''' 
*--------------- TRAIN_SVM ------------------*
|       Function: svm.SVC()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn import svm
print("\n\nTraining the Support Vector Classifier on Training Data")
print("========================================================\n")
print("\nParameters and their values:")
print("============================\n")
svc_model = svm.SVC(gamma='auto',random_state=0)
svc_model.fit(input_vector_train,np.ravel(output_label_train))
print(svc_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(svc_model, open('svc_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('svc_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by svc_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the Support Vector Classifier on Training Data


Parameters and their values:

SVC(gamma='auto', random_state=0)


Accuracy Score:

0.68


F1 Score:

0.81


Precision:

0.68


Recall:

1.0


Confusion Matrix:

[[ 0 39]
 [ 0 84]]


Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        39
           1       0.68      1.00      0.81        84

    accuracy                           0.68       123
   macro avg       0.34      0.50      0.41       123
weighted avg       0.47      0.68      0.55       123



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# XG Boost by Train-Test 

In [17]:
# Train the X Gradient Boost

''' 
*--------------- TRAIN_XGB ------------------*
|       Function: xgb.XGB()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
import xgboost as xgb
# Train the XGBoost Classifier
print("\n\nTraining the XGBoost Classifier on Training Data")
print("=================================================\n")
print("\nParameters and their values:")
print("============================\n")
xgb_model = xgb.XGBClassifier(random_state=0)
xgb_model.fit(input_vector_train, np.ravel(output_label_train))
print(xgb_model)
# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(xgb_model, open('xgb_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('xgb_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by xgb_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Training the XGBoost Classifier on Training Data


Parameters and their values:

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=0, ...)


Accuracy Score:

0.79


F1 Score:

0.85


Precision:

0.82


Recall:

0.89


Confusion Matrix:

[[22 17]
 [ 9 75]]


Classification Report:

              precision  

In [16]:
pip install --user xgboost

Note: you may need to restart the kernel to use updated packages.


# K-Nearest Neighbors (KNN) algorithm by Train-Test

In [18]:
from sklearn.neighbors import KNeighborsClassifier
# Train the K-Nearest Neighbors (KNN):  

''' 
*--------------- TRAIN_KNN ------------------*                     |
|       Function: knn.KNN()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''

# Train the KNN Classifier
print("\n\nTraining the K-Nearest Neighbors (KNN) Classifier on Training Data")
print("=============================================================\n")
print("\nParameters and their values:")
print("============================\n")
knn_model = KNeighborsClassifier()
knn_model.fit(input_vector_train, np.ravel(output_label_train))
print(knn_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(knn_model, open('knn_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('knn_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by knn_trained_model:")
# print("==========================================\n")
# print(model_predictions)


from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Training the K-Nearest Neighbors (KNN) Classifier on Training Data


Parameters and their values:

KNeighborsClassifier()


Accuracy Score:

0.61


F1 Score:

0.73


Precision:

0.69


Recall:

0.77


Confusion Matrix:

[[10 29]
 [19 65]]


Classification Report:

              precision    recall  f1-score   support

           0       0.34      0.26      0.29        39
           1       0.69      0.77      0.73        84

    accuracy                           0.61       123
   macro avg       0.52      0.52      0.51       123
weighted avg       0.58      0.61      0.59       123



  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


# Ada Boost Classifier by Train-Test

In [19]:

# Train the AdaBoost Classifier:  

''' 
*--------------- TRAIN_AdaBoost ------------------*
|       Function: adaboost                                         |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.ensemble import AdaBoostClassifier
# Train the AdaBoost Classifier
print("\n\nTraining the AdaBoost Classifier on Training Data")
print("=================================================\n")
print("\nParameters and their values:")
print("============================\n")
ada_model = AdaBoostClassifier(random_state=0)
ada_model.fit(input_vector_train, np.ravel(output_label_train))
print(ada_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(ada_model, open('ada_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('ada_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by ada_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the AdaBoost Classifier on Training Data


Parameters and their values:

AdaBoostClassifier(random_state=0)


Accuracy Score:

0.8


F1 Score:

0.86


Precision:

0.83


Recall:

0.9


Confusion Matrix:

[[23 16]
 [ 8 76]]


Classification Report:

              precision    recall  f1-score   support

           0       0.74      0.59      0.66        39
           1       0.83      0.90      0.86        84

    accuracy                           0.80       123
   macro avg       0.78      0.75      0.76       123
weighted avg       0.80      0.80      0.80       123



# Linear Discriminant Analysis (LDA) by Train-Test

In [20]:
# Train the LinearDiscriminantAnalysis:  

''' 
*--------------- TRAIN_LDA ------------------*
|       Function: lda.LDA()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Train the Linear Discriminant Analysis (LDA) Classifier
print("\n\nTraining the Linear Discriminant Analysis (LDA) Classifier on Training Data")
print("===========================================================================\n")
print("\nParameters and their values:")
print("============================\n")
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(input_vector_train, np.ravel(output_label_train))
print(lda_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(lda_model, open('lda_trained_model.pkl', 'wb'))


# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('lda_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by lda_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the Linear Discriminant Analysis (LDA) Classifier on Training Data


Parameters and their values:

LinearDiscriminantAnalysis()


Accuracy Score:

0.85


F1 Score:

0.9


Precision:

0.83


Recall:

0.99


Confusion Matrix:

[[22 17]
 [ 1 83]]


Classification Report:

              precision    recall  f1-score   support

           0       0.96      0.56      0.71        39
           1       0.83      0.99      0.90        84

    accuracy                           0.85       123
   macro avg       0.89      0.78      0.81       123
weighted avg       0.87      0.85      0.84       123



# Quadratic Discriminant Analysis (QDA) by Train-Test

In [21]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
# Train the LinearDiscriminantAnalysis:  

''' 
*--------------- TRAIN_QDA ------------------*
|       Function: qda.QDA()                                        |
|           Purpose: Train the Algorithm on Training Data          |
|       Arguments:                                                 |
|           Training Data: Provide Training Data to the Model      |
|       Return:                                                    |
|           Parameter: Model return the Training Parameters        |
*------------------------------------------------------------------*
'''

# Train the Quadratic Discriminant Analysis
print("\n\nTraining the Quadratic Discriminant Analysis on Training Data")
print("=============================================================\n")
print("\nParameters and their values:")
print("============================\n")
qda_model = QuadraticDiscriminantAnalysis()
qda_model.fit(input_vector_train, np.ravel(output_label_train))
print(qda_model)

# Save the Trained Model

''' 
*--------------------- SAVE_THE_TRAINED_MODEL ---------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------* 
'''

# Save the Model in a Pkl File

pickle.dump(qda_model, open('qda_trained_model.pkl', 'wb'))

# Load the Saved Model

''' 
*------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                      |
|               Purpose: Method to Load Previously Saved Model  |
|         Arguments:                                            |
|               Model: Trained Model                            |
|         Return:                                               |
|               File: Saved Model will be Loaded in Memory      |
*---------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('qda_trained_model.pkl', 'rb'))

# Evaluate the Machine Learning Model

''' 
*--------------------- EVALUATE_MACHINE_LEARNING_MODEL ----------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------* 
'''

# Provide Test data to the Trained Model

model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predictions"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index=False, header=True)

model_predictions = testing_data_encoded
# print("\n\nPredictions Returned by qda_trained_model:")
# print("==========================================\n")
# print(model_predictions)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions["Loan_Status"], model_predictions["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)




Training the Quadratic Discriminant Analysis on Training Data


Parameters and their values:

QuadraticDiscriminantAnalysis()


Accuracy Score:

0.85


F1 Score:

0.89


Precision:

0.85


Recall:

0.94


Confusion Matrix:

[[25 14]
 [ 5 79]]


Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.64      0.72        39
           1       0.85      0.94      0.89        84

    accuracy                           0.85       123
   macro avg       0.84      0.79      0.81       123
weighted avg       0.84      0.85      0.84       123



# Feedback Phase 

# Take Input from User

In [25]:
# Take Input from User

''' 
*---------------- TAKE_USER_INPUT ----------------*
'''

gender_input = input(
    "\nPlease enter your Gender here (Male, Female) : ").strip()
married_input = input(
    "\nPlease enter your Marital Status here (Yes,No) : ").strip()
dependents_input = input(
    "\nPlease enter number of Dependents here (0,1,2,3[3 or More]) : ")
education_input = input(
    "\nPlease enter Education here (Graduate,Not Graduate) : ").strip()
self_employed_input = input(
    "\nPlease enter your Employment Status here (Yes,No) : ").strip()
income_input = input("\nPlease enter your Income here : ")
coapplicant_input = input("\nPlease enter Co-Applicant income here : ")
loan_amount_input = input("\nPlease enter Loan Amount here : ")
loan_amount_term_input = input("\nPlease enter Loan Amount Term here : ")
credit_input = input(
    "\nPlease enter your Credit History here (0,1) : ").strip()
property_area_input = input(
    "\nPlease enter your Property Status here (Urban,Rural,Semiurban) : ").strip()

# Convert User Input into Feature Vector

user_input = pd.DataFrame({'Gender': [gender_input], 'Married': [married_input], 'Dependents': [dependents_input], 'Education': [education_input], 'Self_Employed': [self_employed_input], 'ApplicantIncome': [
                          income_input], 'CoapplicantIncome': [coapplicant_input], 'LoanAmount': [loan_amount_input], 'Loan_Amount_Term': [loan_amount_term_input], 'Credit_History': [credit_input], 'Property_Area': [property_area_input], })

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)

# Label Encoding

''' 
*----------------- TRANSFORM_UNSEEN_INTPUT_FEATURES ---------------*
|           Function: Transform()                                  |
|                 Purpose: Transform Input (Categorical) into      |
|                          Numerical Representation                |
|           Arguments:                                             |
|                 Attribute: Target values                         |
|           Return:                                                |
|                 Attribute: Numerical Representation              |
*------------------------------------------------------------------*   
'''

# Transform Input (Categorical) Attributes of Unseen Data into Numerical Representation

unseen_data_features = user_input.copy()
unseen_data_features["Gender"] = gender_label_encoder.transform(
    user_input['Gender'])
unseen_data_features["Married"] = married_label_encoder.transform(
    user_input['Married'])
unseen_data_features["Education"] = education_label_encoder.transform(
    user_input['Education'])
unseen_data_features["Self_Employed"] = self_employed_label_encoder.transform(
    user_input['Self_Employed'])
unseen_data_features["Property_Area"] = property_area_label_encoder.transform(
    user_input['Property_Area'])

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)

print("\n\nUser Input Encoded Feature Vector:")
print("==================================\n")
print(unseen_data_features)

# Load the Saved Model

''' 
*----------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                          |
|             Purpose: Method to Load Previously Saved Model        |
|         Arguments:                                                |
|               Model: Trained Model                                |
|         Return:                                                   |
|               File: Saved Model will be Loaded in Memory          |
*-------------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('logistic_trained_model.pkl', 'rb'))


# Prediction of Unseen Instance

''' 
*----------------------------  MODEL_PREDICTION --------------------------*
|           Function: predict()                                           |
|                 Purpose: Use Trained Model to Predict the Output        |
|                          of Unseen Instances                            |
|           Arguments:                                                    |
|                 User Data: Label Encoded Feature Vector of              |
|                            Unseen Instances                             |
|           Return:                                                       |
|                 Survival: Survived or Not Survived                      |
*-------------------------------------------------------------------------*
'''

# Make a Prediction on Unseen Data

predicted_loan_status = model.predict(unseen_data_features)

if (predicted_loan_status == 1):
    prediction = "Approved"
if (predicted_loan_status == 0):
    prediction = "NOT Approved"

# Add the Prediction in a Pretty Table

pretty_table = PrettyTable()
pretty_table.add_column("       ** Prediction **       ", [prediction])
print(pretty_table)



Please enter your Gender here (Male, Female) : Male

Please enter your Marital Status here (Yes,No) : No

Please enter number of Dependents here (0,1,2,3[3 or More]) : 0

Please enter Education here (Graduate,Not Graduate) : Graduate

Please enter your Employment Status here (Yes,No) : Yes

Please enter your Income here : 2000

Please enter Co-Applicant income here : 1200

Please enter Loan Amount here : 200

Please enter Loan Amount Term here : 180

Please enter your Credit History here (0,1) : 0

Please enter your Property Status here (Urban,Rural,Semiurban) : Urban


User Input Feature Vector:

  Gender Married Dependents Education Self_Employed ApplicantIncome  \
0   Male      No          0  Graduate           Yes            2000   

  CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area  
0              1200        200              180              0         Urban  


User Input Feature Vector:

  Gender Married Dependents Education Self_Employed ApplicantIn