# Import Libraries

In [17]:
# Import Libraries

import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

from prettytable import PrettyTable   
from astropy.table import Table, Column

# Load Sample Data

In [18]:
# Load Sample Data

''' 
*---------------------- LOAD_SAMPLE_DATA ------------------------*
|     Function: read_csv()                                       |
|             Purpose: Read a dataset in CSV file format         |
|     Arguments:                                                 |
|             path: Path to dataset file                         |
|             dataset: Dataset file name                         |
|     Return:                                                    |
|             dataset: Dataset in DataFrame format               |
*----------------------------------------------------------------*
'''
 
sample_data = pd.read_csv("C:/Users/ahmad/OneDrive/Desktop/ML/SemesterProject/sample_data.csv")

print("\n\nSample Data:")
print("============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
# Print the first 10 lines
print(sample_data.head(10))

# Print the last 10 lines
print(sample_data.tail(10))



Sample Data:

    Loan_ID Gender Married  Dependents     Education Self_Employed  \
0  LP001002   Male      No           0      Graduate            No   
1  LP001003   Male     Yes           1      Graduate            No   
2  LP001005   Male     Yes           0      Graduate           Yes   
3  LP001006   Male     Yes           0  Not Graduate            No   
4  LP001008   Male      No           0      Graduate            No   
5  LP001011   Male     Yes           2      Graduate           Yes   
6  LP001013   Male     Yes           0  Not Graduate            No   
7  LP001014   Male     Yes           3      Graduate            No   
8  LP001018   Male     Yes           2      Graduate            No   
9  LP001020   Male     Yes           1      Graduate            No   

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0             5849                0.0         140               360   
1             4583             1508.0         128               360   


# Understand Sample Data

In [19]:
# Understand Sample Data

print("\n\nAttributes in Sample Data:")
print("==========================\n")

print(sample_data.columns)

print("\n\nNumber of Instances in Sample Data:",sample_data["Gender"].count())
print("========================================\n")




Attributes in Sample Data:

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')


Number of Instances in Sample Data: 614



In [20]:
# Extract the features
features = sample_data[["Gender", "Married", "Dependents", "Education", "Self_Employed", "ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Loan_Amount_Term",
                        "Credit_History", "Property_Area", "Loan_Status"]]

# Save the features to a new CSV file
features.to_csv("features.csv", index=False)
print(features.head(10))

  Gender Married  Dependents     Education Self_Employed  ApplicantIncome  \
0   Male      No           0      Graduate            No             5849   
1   Male     Yes           1      Graduate            No             4583   
2   Male     Yes           0      Graduate           Yes             3000   
3   Male     Yes           0  Not Graduate            No             2583   
4   Male      No           0      Graduate            No             6000   
5   Male     Yes           2      Graduate           Yes             5417   
6   Male     Yes           0  Not Graduate            No             2333   
7   Male     Yes           3      Graduate            No             3036   
8   Male     Yes           2      Graduate            No             4006   
9   Male     Yes           1      Graduate            No            12841   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0         140               360               1   
1           

# Train the Label Encoder

In [21]:
# Train the Label Encoder

''' 
*------------------ TRAIN_LABEL_ENCODER --------------------*
|        Function: Fit()                                    |
|              Purpose: Fit or Train the Label Encoder      |
|        Arguments:                                         |
|               Labels: Target Values                       |
|        Return:                                            |
|               Instance: Returns an instance of self       |
*-----------------------------------------------------------*
'''

# Labels

gender = pd.DataFrame({"Gender": ["Male", "Female"]})
married = pd.DataFrame({"Married": ["Yes", "No"]})
education = pd.DataFrame({"Education": ["Graduate", "Not Graduate"]})
self_employed = pd.DataFrame({"Self_Employed": ["Yes", "No"]})
property_area = pd.DataFrame(
    {"Property_Area": ["Rural", "Urban", "Semiurban"]})
loan_status = pd.DataFrame({"Loan_Status": ["Y", "N"]})

# Initialize the Label Encoders

gender_label_encoder = LabelEncoder()
married_label_encoder = LabelEncoder()
education_label_encoder = LabelEncoder()
self_employed_label_encoder = LabelEncoder()
property_area_label_encoder = LabelEncoder()
loan_status_label_encoder = LabelEncoder()

# Train the Label Encoders

gender_label_encoder.fit(np.ravel(gender))
married_label_encoder.fit(np.ravel(married))
education_label_encoder.fit(np.ravel(education))
self_employed_label_encoder.fit(np.ravel(self_employed))
property_area_label_encoder.fit(np.ravel(property_area))
loan_status_label_encoder.fit(np.ravel(loan_status))


LabelEncoder()

# Label Encoding of the Output

In [22]:
# Label Encoding of the Output

''' 
*------------------ LABEL_ENCODE_OUTPUT --------------------*
|        Function: Transform()                              |
|              Purpose: Transform Input (Categorical)       |
|                       into Numerical Representation       |
|        Arguments:                                         |
|              Attribute: Target values                     |
|        Return:                                            |
|              Attribute: Numerical Representation          |
*-----------------------------------------------------------*
'''

sample_data_encoded_output = features.copy()
original_sample_data = features.copy()

# Transform Output of into Numerical Representation

print("\n\nLoan Status Attribute After Label Encoding:")
print("========================================\n")
features["encoded_loan_status"] = loan_status_label_encoder.transform(
    features['Loan_Status'])
print(features[["Loan_Status", "encoded_loan_status"]])

# Print Original and Encoded Ouput Sample Data

sample_data_encoded_output[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
                            'Credit_History', 'Property_Area', 'Loan_Status']] = features[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
                                                                                           'Credit_History', 'Property_Area', 'encoded_loan_status']]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("\n\nOriginal Sample Data:")
print("=====================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(original_sample_data.head(10))
print("\n\nSample Data after Label Encoding of Output:")
print("===========================================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10))

# Save the Transformed Features into CSV File

sample_data_encoded_output.to_csv(
    r'sample-data-encoded-output.csv', index=False, header=True)




Loan Status Attribute After Label Encoding:

    Loan_Status  encoded_loan_status
0             Y                    1
1             N                    0
2             Y                    1
3             Y                    1
4             Y                    1
5             Y                    1
6             Y                    1
7             N                    0
8             Y                    1
9             N                    0
10            Y                    1
11            Y                    1
12            Y                    1
13            N                    0
14            Y                    1
15            Y                    1
16            Y                    1
17            N                    0
18            N                    0
19            Y                    1
20            N                    0
21            Y                    1
22            N                    0
23            N                    0
24            N             

# Label Encoding of the Input

In [23]:
# Label Encoding of the Input

''' 
*------------------- LABEL_ENCODE_INPUT --------------------*
|        Function: Transform()                              |
|              Purpose: Transform Input (Categorical)       |
|                          into Numerical Representation    |
|        Arguments:                                         |
|              Attribute: Target values                     |
|        Return:                                            |
|              Attribute: Numerical Representation          |
*-----------------------------------------------------------*
'''

sample_data_encoded = sample_data_encoded_output.copy()
sample_data_encoded_output_orignal = sample_data_encoded_output.copy()

# Transform Input Attributes into Numerical Representation

print("\n\nGender Attribute After Label Encoding:")
print("======================================\n")
sample_data_encoded_output["encoded_gender"] = gender_label_encoder.transform(
    sample_data_encoded_output['Gender'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Gender", "encoded_gender"]])

print("\n\nMarried Attribute After Label Encoding:")
print("=======================================\n")
sample_data_encoded_output["encoded_married"] = married_label_encoder.transform(
    sample_data_encoded_output['Married'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Married", "encoded_married"]])

print("\n\nEducation Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_education"] = education_label_encoder.transform(
    sample_data_encoded_output['Education'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[["Education", "encoded_education"]])

print("\n\nSelf_Employed Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_self_employed"] = self_employed_label_encoder.transform(
    sample_data_encoded_output['Self_Employed'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[
      ["Self_Employed", "encoded_self_employed"]])

print("\n\nProperty_Area Attribute After Label Encoding:")
print("========================================\n")
sample_data_encoded_output["encoded_property_area"] = property_area_label_encoder.transform(
    sample_data_encoded_output['Property_Area'])
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded_output.head(10)[
      ["Property_Area", "encoded_property_area"]])

# Print Original and Encoded Sample Data

sample_data_encoded[['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']] = sample_data_encoded_output[[
    'encoded_gender', 'encoded_married', 'encoded_education', 'encoded_self_employed', 'encoded_property_area', 'Loan_Status']]
print("\n\nOriginal Sample Data:")
print("=====================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(original_sample_data.head(10))
print("\n\nSample Data after Label Encoding:")
print("=================================\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data_encoded.head(10))

# Save the Transformed Features into CSV File

sample_data_encoded.to_csv(
    r'sample-data-encoded.csv', index=False, header=True)





Gender Attribute After Label Encoding:

  Gender  encoded_gender
0   Male               1
1   Male               1
2   Male               1
3   Male               1
4   Male               1
5   Male               1
6   Male               1
7   Male               1
8   Male               1
9   Male               1


Married Attribute After Label Encoding:

  Married  encoded_married
0      No                0
1     Yes                1
2     Yes                1
3     Yes                1
4      No                0
5     Yes                1
6     Yes                1
7     Yes                1
8     Yes                1
9     Yes                1


Education Attribute After Label Encoding:

      Education  encoded_education
0      Graduate                  0
1      Graduate                  0
2      Graduate                  0
3  Not Graduate                  1
4      Graduate                  0
5      Graduate                  0
6  Not Graduate                  1
7      Graduate   

# Training Phase

# Splitting Input Vectors and Outputs / Labels of sample Data

In [24]:
# Splitting Input Vectors and Outputs / Labels of sample Data

'''
*----------- SPLIT_INPUT_VECTORS_AND_OUTPUTS/LABELS -----------*
|        Function: iloc()                                      |
|            Purpose: Splitting Input Vector and Labels        |
|        Arguments:                                            |
|            Attribute: Name or Location Attribute to Split    |
|        Return:                                               |
|            Attribute: Split Attributes                       |
*--------------------------------------------------------------*
'''

print("\n\nInput Vectors (Feature Vectors) of Sample Data:")
print("===============================================\n")

input_vector_sample_data = sample_data_encoded.iloc[: , :-1]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(input_vector_sample_data)

print("\n\nOutputs/Labels of Sample Data:")
print("==============================\n")

output_label_sample_data = sample_data_encoded.iloc[: ,-1]
pd.set_option("display.max_rows", None, "display.max_columns", None)
print("  Survived")
print(output_label_sample_data)

# Save the Input Vector and Output-Label into CSV File 

input_vector_sample_data.to_csv(r'input-vector-sample-data.csv', index = False, header = True)
output_label_sample_data.to_csv(r'output-label-sample-data.csv', index = False, header = True)




Input Vectors (Feature Vectors) of Sample Data:

     Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
0         1        0           0          0              0             5849   
1         1        1           1          0              0             4583   
2         1        1           0          0              1             3000   
3         1        1           0          1              0             2583   
4         1        0           0          0              0             6000   
5         1        1           2          0              1             5417   
6         1        1           0          1              0             2333   
7         1        1           3          0              0             3036   
8         1        1           2          0              0             4006   
9         1        1           1          0              0            12841   
10        1        1           2          0              0             3200   
1

# Spliting Data using K-Fold

In [25]:
# Spliting Data using K-Fold

'''
*-------------------- SPLITING_DATA_USING_K_FOLD -----------------------*
|        Function: KFold()                                              |
|            Purpose: Split Dataset into K-Folds                        |
|        Arguments:                                                     |
|            Attribute: Number of Folds                                 |
|        Return:                                                        |
|            Attribute: No. of Splitting Iterations in the Validator    |
*-----------------------------------------------------------------------*
'''

cv = KFold(n_splits=10, random_state=0, shuffle=True)

training_data = {};
testing_data = {};
input_training_data = {};
output_training_data = {};
input_testing_data = {};
output_testing_data = {};
no_of_folds = 0;

for train_index, test_index in cv.split(input_vector_sample_data):
    
    # Training Data
    
    training_data[no_of_folds]=sample_data_encoded.iloc[train_index]
    input_training_data[no_of_folds]=input_vector_sample_data.iloc[train_index]
    output_training_data[no_of_folds]=output_label_sample_data.iloc[train_index]
    
    # Testing Data
    
    testing_data[no_of_folds]=sample_data_encoded.iloc[test_index]  
    input_testing_data[no_of_folds]=input_vector_sample_data.iloc[test_index]
    output_testing_data[no_of_folds]=output_label_sample_data.iloc[test_index]
    
    no_of_folds += 1

# Logistic Regression using K-Fold

In [26]:
# # Save the Trained Models

# ''' 
# *--------------------- SAVE_THE_TRAINED_MODELS --------------------*
# |        Function: dump()                                          |
# |             Purpose: Save the Trained Model on your Hard Disk    |
# |        Arguments:                                                |
# |             Model: Model Objects                                 |
# |        Return:                                                   |
# |             File: Trained Model will be Saved on Hard Disk       |
# *------------------------------------------------------------------* 
# '''
from sklearn.linear_model import LogisticRegression
# # Save the Models in a Pkl File
def save(logistic_model):
    pickle.dump(logistic_model, open('logistic_trained_model.pkl', 'wb'))


In [27]:
# Train the Logistic Regression
from sklearn.metrics import accuracy_score

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    logistic_model = LogisticRegression(max_iter=1000)
    logistic_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(logistic_model)
    print(logistic_model)
    
    # Make predictions on the training data
    y_pred = logistic_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)



Iteration: 1
LogisticRegression(max_iter=1000)

Accuracy: 0.8278985507246377

Iteration: 2
LogisticRegression(max_iter=1000)

Accuracy: 0.8297101449275363

Iteration: 3
LogisticRegression(max_iter=1000)

Accuracy: 0.8297101449275363

Iteration: 4
LogisticRegression(max_iter=1000)

Accuracy: 0.8260869565217391

Iteration: 5
LogisticRegression(max_iter=1000)

Accuracy: 0.8282097649186256

Iteration: 6
LogisticRegression(max_iter=1000)

Accuracy: 0.8282097649186256

Iteration: 7
LogisticRegression(max_iter=1000)

Accuracy: 0.8282097649186256

Iteration: 8
LogisticRegression(max_iter=1000)

Accuracy: 0.8372513562386981

Iteration: 9
LogisticRegression(max_iter=1000)

Accuracy: 0.8318264014466547

Iteration: 10
LogisticRegression(max_iter=1000)

Accuracy: 0.8318264014466547


In [28]:
# # Load the Saved Models

# ''' 
# *---------------------- LOAD_SAVED_MODELS ----------------------*
# |         Function: load()                                      |
# |               Purpose: Method to Load Previously Saved Model  |
# |         Arguments:                                            |
# |               Model: Trained Model                            |
# |         Return:                                               |
# |               File: Saved Model will be Loaded in Memory      |
# *---------------------------------------------------------------*
# '''

# Load the Saved Models

logistic_trained_model = {}

for i in range(no_of_folds):
    logistic_trained_model[i] = pickle.load(open('logistic_trained_model.pkl', 'rb'))


In [29]:
# # Evaluate the Performance of Trained Models

# ''' 
# *--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
# |       Function: Predict()                                                  |
# |             Purpose: Make a Prediction using Algorithm on Test Data        |
# |       Arguments:                                                           |
# |            Testing Data: Provide Test data to the Trained Model            |
# |       Return:                                                              |
# |            Predictions: Model return Predictions                           |
# *----------------------------------------------------------------------------* 
# '''

# Provide Test data to the Trained Models

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predications = logistic_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Loan_Status"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predications)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)



Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.8870967741935484

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.8064516129032258

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.8225806451612904

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.8548387096774194

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8524590163934426

Iteration: 6


Testing Phase for Iteration 06 :
Accuracy: 0.8360655737704918

Iteration: 7


Testing Phase for Iteration 07 :
Accuracy: 0.8688524590163934

Iteration: 8


Testing Phase for Iteration 08 :
Accuracy: 0.7540983606557377

Iteration: 9


Testing Phase for Iteration 09 :
Accuracy: 0.8032786885245902

Iteration: 10


Testing Phase for Iteration 010 :
Accuracy: 0.8032786885245902


In [30]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")






Accuracy Score:

0.8


F1 Score:

0.86


Precision:

0.76


Recall:

1.0


Confusion Matrix:

[[11 12]
 [ 0 38]]


Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.48      0.65        23
           1       0.76      1.00      0.86        38

    accuracy                           0.80        61
   macro avg       0.88      0.74      0.76        61
weighted avg       0.85      0.80      0.78        61



# Random Forest Classifier using K-Fold

In [34]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    rf_model = RandomForestClassifier()
    rf_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(rf_model, f'rf_trained_model.pkl')
    print(rf_model)
    
    # Make predictions on the training data
    y_pred = rf_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
rfc_trained_model = {}

for i in range(no_of_folds):
    rfc_trained_model[i] = pickle.load(open(f'rf_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predications = rfc_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Loan_Status"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predications)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")




Iteration: 1
RandomForestClassifier()

Accuracy: 1.0

Iteration: 2
RandomForestClassifier()

Accuracy: 1.0

Iteration: 3
RandomForestClassifier()

Accuracy: 1.0

Iteration: 4
RandomForestClassifier()

Accuracy: 1.0

Iteration: 5
RandomForestClassifier()

Accuracy: 1.0

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 1.0

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 1.0

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 1.0

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 1.0

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.819672131147541


Accuracy Score:

0.82


F1 Score:

0.88


Precision:

0.83


Recall:

0.93


Confusion Matrix:

[[10  8]
 [ 3 40]]


Classification Report:

              precision    recall  f1-score   support

           0       0.77      0.56      0.65        18
           1       0.83      0.93      0.88        43

    accuracy                           0.82        61
   macro avg       0.80      0.74      0

# Naive Bayes using K-Fold:

In [36]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    nb_model = GaussianNB()
    nb_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(nb_model, f'nb_trained_model.pkl')
    print(nb_model)
    
    # Make predictions on the training data
    y_pred = nb_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
nb_trained_model = {}

for i in range(no_of_folds):
    nb_trained_model[i] = pickle.load(open(f'nb_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predictions = nb_trained_model[i].predict(input_testing_data[i])
    model_predictions_data = input_testing_data[i].copy()
    model_predictions_data["Loan_Status"] = output_testing_data[i]
    model_predictions_data["Predictions"] = model_predictions
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predictions)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")




Iteration: 1
GaussianNB()

Accuracy: 0.8115942028985508

Iteration: 2
GaussianNB()

Accuracy: 0.8188405797101449

Iteration: 3
GaussianNB()

Accuracy: 0.8170289855072463

Iteration: 4
GaussianNB()

Accuracy: 0.8152173913043478

Iteration: 5
GaussianNB()

Accuracy: 0.8119349005424955

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.8870967741935484

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.8064516129032258

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.8064516129032258

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.8064516129032258

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8524590163934426


Accuracy Score:

0.85


F1 Score:

0.9


Precision:

0.85


Recall:

0.95


Confusion Matrix:

[[11  7]
 [ 2 41]]


Classification Report:

              precision    recall  f1-score   support

           0       0.85      0.61      0.71        18
           1       0.85      0.95      0.90        43

    accuracy       

# Decision Tree Algorithm by K-Fold Cross Validation

In [37]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    dt_model = DecisionTreeClassifier()
    dt_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(dt_model, f'dt_trained_model.pkl')
    print(dt_model)
    
    # Make predictions on the training data
    y_pred = dt_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
decision_tree_trained_model = {}

for i in range(no_of_folds):
    decision_tree_trained_model[i] = pickle.load(open(f'dt_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predications = decision_tree_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Loan_Status"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predications)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")




Iteration: 1
DecisionTreeClassifier()

Accuracy: 1.0

Iteration: 2
DecisionTreeClassifier()

Accuracy: 1.0

Iteration: 3
DecisionTreeClassifier()

Accuracy: 1.0

Iteration: 4
DecisionTreeClassifier()

Accuracy: 1.0

Iteration: 5
DecisionTreeClassifier()

Accuracy: 1.0

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 1.0

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 1.0

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 1.0

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 1.0

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8032786885245902


Accuracy Score:

0.8


F1 Score:

0.85


Precision:

0.9


Recall:

0.81


Confusion Matrix:

[[14  4]
 [ 8 35]]


Classification Report:

              precision    recall  f1-score   support

           0       0.64      0.78      0.70        18
           1       0.90      0.81      0.85        43

    accuracy                           0.80        61
   macro avg       0.77      0.80      0.

# Support Vector Machine by K-Fold 

In [38]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    svm_model = SVC()
    svm_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(svm_model, f'svm_trained_model.pkl')
    print(svm_model)
    
    # Make predictions on the training data
    y_pred = svm_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
svm_trained_model = {}

for i in range(no_of_folds):
    svm_trained_model[i] = pickle.load(open(f'svm_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predictions = svm_trained_model[i].predict(input_testing_data[i])
    model_predictions_data = input_testing_data[i].copy()
    model_predictions_data["Loan_Status"] = output_testing_data[i]
    model_predictions_data["Predictions"] = model_predictions
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predictions)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")




Iteration: 1
SVC()

Accuracy: 0.6847826086956522

Iteration: 2
SVC()

Accuracy: 0.6884057971014492

Iteration: 3
SVC()

Accuracy: 0.6920289855072463

Iteration: 4
SVC()

Accuracy: 0.7010869565217391

Iteration: 5
SVC()

Accuracy: 0.6907775768535263

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.7580645161290323

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.7258064516129032

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.6935483870967742

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.6451612903225806

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.7049180327868853


Accuracy Score:

0.7


F1 Score:

0.83


Precision:

0.7


Recall:

1.0


Confusion Matrix:

[[ 0 18]
 [ 0 43]]


Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        18
           1       0.70      1.00      0.83        43

    accuracy                           0.70        61
  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# XG Boost by K-Fold

In [39]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    xgb_model = XGBClassifier()
    xgb_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(xgb_model, f'xgb_trained_model.pkl')
    print(xgb_model)
    
    # Make predictions on the training data
    y_pred = xgb_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
xgb_trained_model = {}

for i in range(no_of_folds):
    xgb_trained_model[i] = pickle.load(open(f'xgb_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predications = xgb_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Loan_Status"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predications)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")




Iteration: 1
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)

Accuracy: 1.0

Iteration: 2
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enab

# K-Nearest Neighbors (KNN) Algorithm by K-Fold

In [40]:
# Save the Trained Models
'''
*--------------------- SAVE_THE_TRAINED_MODELS --------------------*
|        Function: dump()                                          |
|             Purpose: Save the Trained Model on your Hard Disk    |
|        Arguments:                                                |
|             Model: Model Objects                                 |
|        Return:                                                   |
|             File: Trained Model will be Saved on Hard Disk       |
*------------------------------------------------------------------*
'''
import pickle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Save the Models in a Pkl File
def save(model, filename):
    pickle.dump(model, open(filename, 'wb'))

no_of_folds = 5

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    knn_model = KNeighborsClassifier()
    knn_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    save(knn_model, f'knn_trained_model.pkl')
    print(knn_model)
    
    # Make predictions on the training data
    y_pred = knn_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    
    print("\nAccuracy:", accuracy)

# Load the Saved Models
knn_trained_model = {}

for i in range(no_of_folds):
    knn_trained_model[i] = pickle.load(open(f'knn_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
'''
*--------------- EVALUATE_PERFORMANCE_OF_TRAINED_MODELS ---------------------*
|       Function: Predict()                                                  |
|             Purpose: Make a Prediction using Algorithm on Test Data        |
|       Arguments:                                                           |
|            Testing Data: Provide Test data to the Trained Model            |
|       Return:                                                              |
|            Predictions: Model return Predictions                           |
*----------------------------------------------------------------------------*
'''

accuracy_list = []
for i in range(no_of_folds):
    print("\nIteration:", i+1)
    
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predications = knn_trained_model[i].predict(input_testing_data[i])
    model_predications_data = input_testing_data[i].copy()
    model_predications_data["Loan_Status"] = output_testing_data[i]
    model_predications_data["Predictions"] = model_predications
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predications)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(
    model_predications_data["Loan_Status"], model_predications_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)

# # Find the most discriminating confusion matrix
# most_discriminating_accuracy = max(accuracy_list)
# most_discriminating_index = accuracy_list.index(most_discriminating_accuracy)
# most_discriminating_matrix = confusion_matrix(output_testing_data[most_discriminating_index], model_predictions_data["Predictions"])

# # Compare most discriminating matrix with the previous one
# if most_discriminating_accuracy > previous_accuracy:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model performs better than the previous one.")
# else:
#     print("\nThe most discriminating confusion matrix is:")
#     print(most_discriminating_matrix)
#     print("\nThe current model does not perform better than the previous one.")



Iteration: 1
KNeighborsClassifier()

Accuracy: 0.7336956521739131

Iteration: 2
KNeighborsClassifier()

Accuracy: 0.7409420289855072

Iteration: 3
KNeighborsClassifier()

Accuracy: 0.7427536231884058

Iteration: 4
KNeighborsClassifier()

Accuracy: 0.7355072463768116

Iteration: 5
KNeighborsClassifier()

Accuracy: 0.7179023508137432

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.7258064516129032

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.7580645161290323

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.6774193548387096

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.7580645161290323

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.6557377049180327


Accuracy Score:

0.66


F1 Score:

0.78


Precision:

0.71


Recall:

0.86


Confusion Matrix:

[[ 3 15]
 [ 6 37]]


Classification Report:

              precision    recall  f1-score   support

           0       0.33      0.17      0.22        18
           1       0.71

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


# Ada Boost Classifier by K-Fold

In [41]:
# Import the required libraries
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Train the AdaBoost Classifier
ada_boost_models = []

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    ada_boost_model = AdaBoostClassifier()
    ada_boost_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    ada_boost_models.append(ada_boost_model)
    
    # Make predictions on the training data
    y_pred = ada_boost_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    print("\nAccuracy:", accuracy)

# Save the Trained Models
def save(model, iteration):
    pickle.dump(model, open('ada_boost_trained_model.pkl', 'wb'))

for i, model in enumerate(ada_boost_models):
    save(model, i+1)

# Load the Saved Models
ada_boost_trained_models = {}

for i in range(no_of_folds):
    ada_boost_trained_models[i] = pickle.load(open('ada_boost_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
accuracy_list = []

for i in range(no_of_folds):
    print("\nIteration:", i+1)
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predictions = ada_boost_trained_models[i].predict(input_testing_data[i])
    model_predictions_data = input_testing_data[i].copy()
    model_predictions_data["Loan_Status"] = output_testing_data[i]
    model_predictions_data["Predictions"] = model_predictions
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predictions)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Iteration: 1

Accuracy: 0.8586956521739131

Iteration: 2

Accuracy: 0.855072463768116

Iteration: 3

Accuracy: 0.8677536231884058

Iteration: 4

Accuracy: 0.8514492753623188

Iteration: 5

Accuracy: 0.8553345388788427

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.8870967741935484

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.8225806451612904

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.8870967741935484

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.8870967741935484

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8360655737704918


Accuracy Score:

0.84


F1 Score:

0.89


Precision:

0.84


Recall:

0.95


Confusion Matrix:

[[10  8]
 [ 2 41]]


Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.56      0.67        18
           1       0.84      0.95      0.89        43

    accuracy                           0.84        61
   macro avg       0.84      0

# Linear Discriminant Analysis (LDA) by K-Fold

In [42]:
# Import the required libraries
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Train the LDA Model
lda_models = []

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    lda_model = LinearDiscriminantAnalysis()
    lda_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    lda_models.append(lda_model)
    
    # Make predictions on the training data
    y_pred = lda_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    print("\nAccuracy:", accuracy)

# Save the Trained Models
def save(model, iteration):
    pickle.dump(model, open('lda_trained_model.pkl', 'wb'))

for i, model in enumerate(lda_models):
    save(model, i+1)

# Load the Saved Models
lda_trained_models = {}

for i in range(no_of_folds):
    lda_trained_models[i] = pickle.load(open('lda_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
accuracy_list = []

for i in range(no_of_folds):
    print("\nIteration:", i+1)
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predictions = lda_trained_models[i].predict(input_testing_data[i])
    model_predictions_data = input_testing_data[i].copy()
    model_predictions_data["Loan_Status"] = output_testing_data[i]
    model_predictions_data["Predictions"] = model_predictions
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predictions)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Iteration: 1

Accuracy: 0.822463768115942

Iteration: 2

Accuracy: 0.8315217391304348

Iteration: 3

Accuracy: 0.8297101449275363

Iteration: 4

Accuracy: 0.8260869565217391

Iteration: 5

Accuracy: 0.8264014466546112

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.8870967741935484

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.8064516129032258

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.8225806451612904

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.8548387096774194

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8524590163934426


Accuracy Score:

0.85


F1 Score:

0.9


Precision:

0.84


Recall:

0.98


Confusion Matrix:

[[10  8]
 [ 1 42]]


Classification Report:

              precision    recall  f1-score   support

           0       0.91      0.56      0.69        18
           1       0.84      0.98      0.90        43

    accuracy                           0.85        61
   macro avg       0.87      0.

# Quadratic Discriminant Analysis (QDA) by K-Fold

In [43]:
# Import the required libraries
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

# Train the QDA classifier
qda_models = []

for i in range(no_of_folds):
    # Count the number of iterations
    iteration = i + 1
    print("\nIteration:", iteration)
    
    qda_model = QuadraticDiscriminantAnalysis()
    qda_model.fit(input_training_data[i], np.ravel(output_training_data[i]))
    qda_models.append(qda_model)
    
    # Make predictions on the training data
    y_pred = qda_model.predict(input_training_data[i])
    
    # Calculate the accuracy
    accuracy = accuracy_score(output_training_data[i], y_pred)
    print("\nAccuracy:", accuracy)

# Save the Trained Models
def save(model, iteration):
    pickle.dump(model, open('qda_trained_model.pkl', 'wb'))

for i, model in enumerate(qda_models):
    save(model, i+1)

# Load the Saved Models
qda_trained_models = {}

for i in range(no_of_folds):
    qda_trained_models[i] = pickle.load(open('qda_trained_model.pkl', 'rb'))

# Evaluate the Performance of Trained Models
accuracy_list = []

for i in range(no_of_folds):
    print("\nIteration:", i+1)
    print("\n\nTesting Phase for Iteration 0" + str(i+1) + " :")
    print("================================")
    
    model_predictions = qda_trained_models[i].predict(input_testing_data[i])
    model_predictions_data = input_testing_data[i].copy()
    model_predictions_data["Loan_Status"] = output_testing_data[i]
    model_predictions_data["Predictions"] = model_predictions
    
    # Calculate and print the accuracy
    accuracy = accuracy_score(output_testing_data[i], model_predictions)
    print("Accuracy:", accuracy)
    
    accuracy_list.append(accuracy)

# Calculate the Accuracy Score
model_accuracy_score = accuracy_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score, 2))

# Calculate the F1 score
model_f1_score = f1_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nF1 Score:")
print("==========\n")
print(round(model_f1_score, 2))

# Calculate Precision
model_precision = precision_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nPrecision:")
print("==========\n")
print(round(model_precision, 2))

# Calculate Recall
model_recall = recall_score(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nRecall:")
print("=======\n")
print(round(model_recall, 2))

# Calculate the Confusion Matrix
model_confusion_matrix = confusion_matrix(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nConfusion Matrix:")
print("=================\n")
print(model_confusion_matrix)

# Calculate Classification Report
model_classification_report = classification_report(model_predictions_data["Loan_Status"], model_predictions_data["Predictions"])

print("\n\nClassification Report:")
print("=======================\n")
print(model_classification_report)



Iteration: 1

Accuracy: 0.8134057971014492

Iteration: 2

Accuracy: 0.8260869565217391

Iteration: 3

Accuracy: 0.822463768115942

Iteration: 4

Accuracy: 0.8206521739130435

Iteration: 5

Accuracy: 0.8227848101265823

Iteration: 1


Testing Phase for Iteration 01 :
Accuracy: 0.8870967741935484

Iteration: 2


Testing Phase for Iteration 02 :
Accuracy: 0.8064516129032258

Iteration: 3


Testing Phase for Iteration 03 :
Accuracy: 0.8064516129032258

Iteration: 4


Testing Phase for Iteration 04 :
Accuracy: 0.8064516129032258

Iteration: 5


Testing Phase for Iteration 05 :
Accuracy: 0.8524590163934426


Accuracy Score:

0.85


F1 Score:

0.9


Precision:

0.85


Recall:

0.95


Confusion Matrix:

[[11  7]
 [ 2 41]]


Classification Report:

              precision    recall  f1-score   support

           0       0.85      0.61      0.71        18
           1       0.85      0.95      0.90        43

    accuracy                           0.85        61
   macro avg       0.85      0.

# Applicattion Phase

In [44]:
# Take Input from User

''' 
*---------------- TAKE_USER_INPUT ----------------*
'''

gender_input = input(
    "\nPlease enter your Gender here (Male, Female) : ").strip()
married_input = input(
    "\nPlease enter your Marital Status here (Yes,No) : ").strip()
dependents_input = input(
    "\nPlease enter number of Dependents here (0,1,2,3[3 or More]) : ")
education_input = input(
    "\nPlease enter Education here (Graduate,Not Graduate) : ").strip()
self_employed_input = input(
    "\nPlease enter your Employment Status here (Yes,No) : ").strip()
income_input = input("\nPlease enter your Income here : ")
coapplicant_input = input("\nPlease enter Co-Applicant income here : ")
loan_amount_input = input("\nPlease enter Loan Amount here : ")
loan_amount_term_input = input("\nPlease enter Loan Amount Term here : ")
credit_input = input(
    "\nPlease enter your Credit History here (0,1) : ").strip()
property_area_input = input(
    "\nPlease enter your Property Status here (Urban,Rural,Semiurban) : ").strip()

# Convert User Input into Feature Vector

user_input = pd.DataFrame({'Gender': [gender_input], 'Married': [married_input], 'Dependents': [dependents_input], 'Education': [education_input], 'Self_Employed': [self_employed_input], 'ApplicantIncome': [
                          income_input], 'CoapplicantIncome': [coapplicant_input], 'LoanAmount': [loan_amount_input], 'Loan_Amount_Term': [loan_amount_term_input], 'Credit_History': [credit_input], 'Property_Area': [property_area_input], })

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)

# Label Encoding

''' 
*----------------- TRANSFORM_UNSEEN_INTPUT_FEATURES ---------------*
|           Function: Transform()                                  |
|                 Purpose: Transform Input (Categorical) into      |
|                          Numerical Representation                |
|           Arguments:                                             |
|                 Attribute: Target values                         |
|           Return:                                                |
|                 Attribute: Numerical Representation              |
*------------------------------------------------------------------*   
'''

# Transform Input (Categorical) Attributes of Unseen Data into Numerical Representation

unseen_data_features = user_input.copy()
unseen_data_features["Gender"] = gender_label_encoder.transform(
    user_input['Gender'])
unseen_data_features["Married"] = married_label_encoder.transform(
    user_input['Married'])
unseen_data_features["Education"] = education_label_encoder.transform(
    user_input['Education'])
unseen_data_features["Self_Employed"] = self_employed_label_encoder.transform(
    user_input['Self_Employed'])
unseen_data_features["Property_Area"] = property_area_label_encoder.transform(
    user_input['Property_Area'])

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)

print("\n\nUser Input Encoded Feature Vector:")
print("==================================\n")
print(unseen_data_features)

# Load the Best Model

''' 
*------------------------ LOAD_BEST_MODEL --------------------------*
|         Function: load()                                          |
|             Purpose: Method to Load Previously Saved Model        |
|         Arguments:                                                |
|               Model: Trained Model                                |
|         Return:                                                   |
|               File: Saved Model will be Loaded in Memory          |
*-------------------------------------------------------------------*
'''

# Load the Best Model
# svc_trained_model_01 has Highest Accuracy

model = pickle.load(open('logistic_trained_model.pkl', 'rb'))


# Load the Saved Model

''' 
*----------------------- LOAD_SAVED_MODEL --------------------------*
|         Function: load()                                          |
|             Purpose: Method to Load Previously Saved Model        |
|         Arguments:                                                |
|               Model: Trained Model                                |
|         Return:                                                   |
|               File: Saved Model will be Loaded in Memory          |
*-------------------------------------------------------------------*
'''

# Load the Saved Model

model = pickle.load(open('logistic_trained_model.pkl', 'rb'))


# Prediction of Unseen Instance

''' 
*----------------------------  MODEL_PREDICTION --------------------------*
|           Function: predict()                                           |
|                 Purpose: Use Trained Model to Predict the Output        |
|                          of Unseen Instances                            |
|           Arguments:                                                    |
|                 User Data: Label Encoded Feature Vector of              |
|                            Unseen Instances                             |
|           Return:                                                       |
|                 Survival: Approved or Not Approved                      |
*-------------------------------------------------------------------------*
'''

# Make a Prediction on Unseen Data

predicted_loan_status = model.predict(unseen_data_features)

if (predicted_loan_status == 1):
    prediction = "Approved"
if (predicted_loan_status == 0):
    prediction = "NOT Approved"

# Add the Prediction in a Pretty Table

pretty_table = PrettyTable()
pretty_table.add_column("       ** Prediction **       ", [prediction])
print(pretty_table)




Please enter your Gender here (Male, Female) : Male

Please enter your Marital Status here (Yes,No) : No

Please enter number of Dependents here (0,1,2,3[3 or More]) : 1

Please enter Education here (Graduate,Not Graduate) : Graduate

Please enter your Employment Status here (Yes,No) : No

Please enter your Income here : 0

Please enter Co-Applicant income here : 0

Please enter Loan Amount here : 2000

Please enter Loan Amount Term here : 360

Please enter your Credit History here (0,1) : 1

Please enter your Property Status here (Urban,Rural,Semiurban) : Urban


User Input Feature Vector:

  Gender Married Dependents Education Self_Employed ApplicantIncome  \
0   Male      No          1  Graduate            No               0   

  CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area  
0                 0       2000              360              1         Urban  


User Input Feature Vector:

  Gender Married Dependents Education Self_Employed ApplicantIncome  