In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from imblearn.over_sampling import SMOTE, RandomOverSampler
from collections import Counter
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, cross_val_score, GridSearchCV, cross_validate

# importing two different imputation methods that take into consideration all the features when predicting the missing values
from sklearn.impute import KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import SimpleImputer

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

#multiclass imports
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.dummy import DummyClassifier #Will identify the maority calss base line, model needs to do better then the baseline

from statistics import mean
# to reduce randomness then you put the seed
np.random.seed(42)

from ArtificialImmuneSystem import *
from imblearn.metrics import geometric_mean_score

In [2]:
df = pd.read_csv('Data\GeneratedSyntheticData-testing.csv')
#df = df.drop('Unnamed: 0', axis=1)

In [3]:
print(f"Data shape: \n{df.shape}\n")
print(f"Data size: \n{df.size}\n")
print(f"Data ndim: \n{df.ndim}\n")
print("_____________________________________________\n")
print(f"Old Class Distribution: {Counter(df['5'])}")

Data shape: 
(300, 6)

Data size: 
1800

Data ndim: 
2

_____________________________________________

Old Class Distribution: Counter({0.0: 247, 1.0: 53})


In [4]:
#Split the dataset into a train set = 80% and test = 20%
data_train, data_test = train_test_split(df, test_size=0.2, random_state=42)

#Print the shape of the train and test set
print(f"Train Data shape: \n{data_train}\n")
print(f"Test Data shape: \n{data_test}\n")

data_train_AIS = data_train.copy()
data_train_SMOTE = data_train.copy()

Train Data shape: 
            0         1         2         3         4    5
232 -0.019817  0.058038 -1.455309  0.023516  0.054193  0.0
59   0.055881 -0.163657  0.666498 -0.066311 -0.152815  0.0
6    0.041093 -0.120351  1.869881 -0.048764 -0.112377  0.0
185  0.148608 -0.435229 -0.695417 -0.176347 -0.406396  1.0
173 -0.013564  0.039726  0.225626  0.016096  0.037094  1.0
..        ...       ...       ...       ...       ...  ...
188  0.078593 -0.230175 -0.700165 -0.093263 -0.214926  0.0
71  -0.016761  0.049087 -1.858915  0.019889  0.045835  1.0
106  0.047683 -0.139649 -0.473959 -0.056583 -0.130397  0.0
270  0.147648 -0.432416 -0.864888 -0.175207 -0.403769  0.0
102 -0.004327  0.012672  0.695998  0.005134  0.011832  0.0

[240 rows x 6 columns]

Test Data shape: 
            0         1         2         3         4    5
203 -0.321374  0.941208  0.287581  0.381360  0.878854  1.0
266  0.264000 -0.773176  0.919191 -0.313277 -0.721955  1.0
152  0.027106 -0.079385  1.432811 -0.032165 -0.074126

In [5]:
#Create an oversampling object
oversample = SMOTE()
oversample_AIS = ArtificialImmuneSystem()
#Oversample and add to the dataframe to fix the class imbalance
randomForest = RandomForestClassifier()
x_over, y_over = oversample.fit_resample(data_train_SMOTE.drop(["5"], axis=1), data_train_SMOTE.drop(data_train_SMOTE.columns[0:-1],axis=1))
input_x_over_AIS, y_over_AIS = oversample_AIS.AIS_Resample(data_train_AIS.drop(["5"], axis=1), data_train_AIS.drop(data_train_AIS.columns[0:-1],axis=1), 20, 5, randomForest,5,'balanced_accuracy' )

smote_df = pd.concat([x_over, y_over], axis=1)
ais_df = pd.concat([input_x_over_AIS, y_over_AIS], axis=1)

# print the dimensionality of the oversampled dataset
print(f"SMOTE Oversampled Data shape: \n{smote_df.shape}\n")
print(f"SMOTE Oversampled Data size: \n{smote_df.size}\n")
print(f"SMOTE Oversampled Data ndim: \n{smote_df.ndim}\n")
print("_____________________________________________\n")

# print the dimensionality of the oversampled dataset
print(f"AIS Oversampled Data shape: \n{ais_df.shape}\n")
print(f"AIS Oversampled Data size: \n{ais_df.size}\n")
print(f"AIS Oversampled Data ndim: \n{ais_df.ndim}\n")
print("_____________________________________________\n")


# print the new class distribution using a Counter
print(f"New SMOTE Class Distribution: {Counter(smote_df['5'])}")
print(f"New AIS Class Distribution: {Counter(ais_df['5'])}")
# print the new class distribution using a Counter
print(f"Old Class Distribution: {Counter(data_train['5'])}")

print("_____________________________________________\n")


origin_feat_train before:  (192, 5)
origin_labels_train before:  Counter({0.0: 156, 1.0: 36})
origin_feat_train after:  (240, 5)
population_features:  (154, 5)
origin_labels_train after:  Counter({0.0: 194, 1.0: 46})
origin_feat_train before:  (192, 5)
origin_labels_train before:  Counter({0.0: 155, 1.0: 37})
origin_feat_train after:  (240, 5)
population_features:  (154, 5)
origin_labels_train after:  Counter({0.0: 191, 1.0: 49})
score1: 0.6795085470085469
score2: 0.5965743686138423
origin_feat_train before:  (192, 5)
origin_labels_train before:  Counter({0.0: 159, 1.0: 33})
origin_feat_train after:  (240, 5)
population_features:  (154, 5)
origin_labels_train after:  Counter({0.0: 199, 1.0: 41})
score1: 0.6795085470085469
score2: 0.5618727106227106
origin_feat_train before:  (192, 5)
origin_labels_train before:  Counter({0.0: 158, 1.0: 34})
origin_feat_train after:  (240, 5)
population_features:  (154, 5)
origin_labels_train after:  Counter({0.0: 199, 1.0: 41})
score1: 0.67950854700854

In [None]:
#aisOversample = ArtificialImmuneSystem()
#minority_class = df[df['5'] == 1]
#majority_class = df[df['5'] == 0]

#requiredPopulation = len(majority_class)-len(minority_class)
#population = aisOversample.AIS(minority_class, max_rounds=100, totalPopulation=requiredPopulation)


In [None]:
#Extracting Labels
#Get a list of all columns
#columns = data_train.columns.to_list()
#Remove the label and save it
#columns_drop = columns.pop(-1)

#Remove all labels except for the label in the train and test dataframe
#labels_train = data_train.drop(columns, axis=1)
#labels_test = data_test.drop(columns, axis=1)

#Print the labesl of the test and train
#print(f"labels_train: \n{labels_train}\n")
#print(f"labels_test: \n{labels_test}\n")

#Remove the label from the train and test dataframe
#features_train = data_train.drop(['5'], axis=1)
#features_test = data_test.drop(['5'], axis=1)

#Print the features of the train and test dataset
#print(f"features_train: \n{features_train }\n")
#print(f"lfeatures_test: \n{features_test }\n")

In [None]:
#labelTrainFlat = labels_train.values.ravel()

#Fit one vs rest Gradient Boosting classification
gradientBoosting = GradientBoostingClassifier()
gradientBoosting = gradientBoosting.fit(x_over, y_over.values.ravel())

gradientBoosting_AIS = GradientBoostingClassifier()
gradientBoosting_AIS = gradientBoosting.fit(input_x_over_AIS, y_over_AIS.values.ravel())

#Fit RandomForestClassifier classification
randomForest = RandomForestClassifier()
randomForest = randomForest.fit(x_over,y_over.values.ravel())

randomForest = RandomForestClassifier()
randomForest_AIS  = randomForest.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Create a KNeighbors classification object
kNeighbors = KNeighborsClassifier()
kNeighbors = kNeighbors.fit(x_over,y_over.values.ravel())

kNeighbors = KNeighborsClassifier()
kNeighbors_AIS  = kNeighbors.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Create an LogisticRegression object
logisticRegression = LogisticRegression(max_iter=5000)
logisticRegression = logisticRegression.fit(x_over,y_over.values.ravel())

logisticRegression = LogisticRegression(max_iter=5000)
logisticRegression_AIS  = logisticRegression.fit(input_x_over_AIS,y_over_AIS.values.ravel())

In [None]:
#Set the parameters of GradientBoosting for GridSearchCV
parametersGradientBoosting = [
    {'learning_rate': [0.44,0.45,0.46],'min_samples_leaf': [5,6,7],'min_samples_split': [7,8,9,10], 'n_estimators': [57,58,59,60]}
]

#Set the scoring parameters
scoringX = {"roc_auc": "roc_auc", "bal_accuracy": "balanced_accuracy"}

#Preform Gridsearch to find best parameters
grid_searchGradientBoosting = GridSearchCV(gradientBoosting, parametersGradientBoosting, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')
grid_searchGradientBoosting_AIS = GridSearchCV(gradientBoosting, parametersGradientBoosting, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')

#Fit the GradientBoosting 
grid_searchGradientBoosting.fit(x_over, y_over.values.ravel())
grid_searchGradientBoosting_AIS.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Print GridSearchCV Results
print(f"Best parameters GradientBoosting: \n{grid_searchGradientBoosting.best_params_}\n")
print(f"Best estimator GradientBoosting: \n{grid_searchGradientBoosting.best_estimator_}\n")
print(f"Best score GradientBoosting: \n{grid_searchGradientBoosting.best_score_}\n")
print("_____________________________________________\n")
print(f"Best parameters GradientBoosting AIS: \n{grid_searchGradientBoosting_AIS.best_params_}\n")
print(f"Best estimator GradientBoosting AIS: \n{grid_searchGradientBoosting_AIS.best_estimator_}\n")
print(f"Best score GradientBoosting AIS: \n{grid_searchGradientBoosting_AIS.best_score_}\n")

KeyboardInterrupt: 

In [None]:


#Set the parameters of KNeighbors for GridSearchCV
parametersKNeighbors = [
    {'n_neighbors': [1,2,3],'weights':['uniform', 'distance'],'algorithm':['auto'], 'p': [1,2,3]}
]

#Set the scoring parameters
scoringX = {"roc_auc": "roc_auc", "bal_accuracy": "balanced_accuracy"}

#Preform KNeighbors to find best parameters
grid_searchKNeighbors = GridSearchCV(kNeighbors, parametersKNeighbors, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')
grid_searchKNeighbors_AIS = GridSearchCV(kNeighbors, parametersKNeighbors, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')

#Fit the KNeighbors 
grid_searchKNeighbors.fit(x_over, y_over.values.ravel())
grid_searchKNeighbors_AIS.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Print GridSearchCV Results
print(f"Best parameters KNeighbors: \n{grid_searchKNeighbors.best_params_}\n")
print(f"Best estimator KNeighbors: \n{grid_searchKNeighbors.best_estimator_}\n")
print(f"Best score KNeighbors: \n{grid_searchKNeighbors.best_score_}\n")
print("_____________________________________________\n")
print(f"Best parameters KNeighbors AIS: \n{grid_searchKNeighbors_AIS.best_params_}\n")
print(f"Best estimator KNeighbors AIS: \n{grid_searchKNeighbors_AIS.best_estimator_}\n")
print(f"Best score KNeighbors AIS: \n{grid_searchKNeighbors_AIS.best_score_}\n")

Best parameters KNeighbors: 
{'algorithm': 'auto', 'n_neighbors': 3, 'p': 2, 'weights': 'distance'}

Best estimator KNeighbors: 
KNeighborsClassifier(n_neighbors=3, weights='distance')

Best score KNeighbors: 
0.7614285714285715

_____________________________________________

Best parameters KNeighbors: 
{'algorithm': 'auto', 'n_neighbors': 2, 'p': 3, 'weights': 'uniform'}

Best estimator KNeighbors: 
KNeighborsClassifier(n_neighbors=2, p=3)

Best score KNeighbors: 
0.865



In [None]:


#Set the parameters of LogisticRegression for GridSearchCV
parametersLogisticRegression = [
    {'multi_class': ['ovr'],'penalty':['none','l2'], 'C': [1,2,3]}
]
scoringX = {"roc_auc": "roc_auc", "bal_accuracy": "balanced_accuracy"}

#Preform LogisticRegression to find best parameters
grid_searchLogisticRegression = GridSearchCV(logisticRegression, parametersLogisticRegression, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')
grid_searchLogisticRegression_AIS = GridSearchCV(logisticRegression, parametersLogisticRegression, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')

#Fit the LogisticRegression 
grid_searchLogisticRegression.fit(x_over, y_over.values.ravel())
grid_searchLogisticRegression_AIS.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Print LogisticRegression Results
print(f"Best parameters Logistic Regression: \n{grid_searchLogisticRegression.best_params_}\n")
print(f"Best estimator Logistic Regression: \n{grid_searchLogisticRegression.best_estimator_}\n")
print(f"Best score Logistic Regression: \n{grid_searchLogisticRegression.best_score_}\n")
print("_____________________________________________\n")
print(f"Best parameters Logistic Regression AIS: \n{grid_searchLogisticRegression_AIS.best_params_}\n")
print(f"Best estimator Logistic Regression AIS: \n{grid_searchLogisticRegression_AIS.best_estimator_}\n")
print(f"Best score Logistic Regression AIS: \n{grid_searchLogisticRegression_AIS.best_score_}\n")

Best parameters Logistic Regression: 
{'C': 1, 'multi_class': 'ovr', 'penalty': 'l2'}

Best estimator Logistic Regression: 
LogisticRegression(C=1, max_iter=5000, multi_class='ovr')

Best score Logistic Regression: 
0.5760204081632654

_____________________________________________

Best parameters Logistic Regression AIS: 
{'C': 1, 'multi_class': 'ovr', 'penalty': 'none'}

Best estimator Logistic Regression AIS: 
LogisticRegression(C=1, max_iter=5000, multi_class='ovr', penalty='none')

Best score Logistic Regression AIS: 
0.7356632653061225



In [None]:
#Set the parameters of RandomForest for GridSearchCV
parametersRandomForest = [
    {'n_estimators': [145,150,155,190],'max_depth': [10,12], 'bootstrap': [True, False],
     'min_samples_split': [0.05,2], 'max_features': ['auto']}
]

#Preform Gridsearch to find best parameters
grid_searchRandomForest = GridSearchCV(randomForest, parametersRandomForest, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')
grid_searchRandomForest_AIS = GridSearchCV(randomForest, parametersRandomForest, cv=4, scoring = scoringX, return_train_score=True, n_jobs=-1, refit='bal_accuracy')

#Fit the RandomForest 
grid_searchRandomForest.fit(x_over, y_over.values.ravel())
grid_searchRandomForest_AIS.fit(input_x_over_AIS,y_over_AIS.values.ravel())

#Print GridSearchCV Results
print(f"Best parameters RandomForest: \n{grid_searchRandomForest.best_params_}\n")
print(f"Best estimator RandomForest: \n{grid_searchRandomForest.best_estimator_}\n")
print(f"Best score RandomForest: \n{grid_searchRandomForest.best_score_}\n")
print("_____________________________________________\n")
print(f"Best parameters RandomForest AIS: \n{grid_searchRandomForest_AIS.best_params_}\n")
print(f"Best estimator RandomForest AIS: \n{grid_searchRandomForest_AIS.best_estimator_}\n")
print(f"Best score RandomForest AIS: \n{grid_searchRandomForest_AIS.best_score_}\n")

Best parameters RandomForest: 
{'bootstrap': False, 'max_depth': 10, 'max_features': 'auto', 'min_samples_split': 0.05, 'n_estimators': 190}

Best estimator RandomForest: 
RandomForestClassifier(bootstrap=False, max_depth=10, min_samples_split=0.05,
                       n_estimators=190)

Best score RandomForest: 
0.746326530612245

_____________________________________________

Best parameters RandomForest AIS: 
{'bootstrap': True, 'max_depth': 10, 'max_features': 'auto', 'min_samples_split': 0.05, 'n_estimators': 150}

Best estimator RandomForest AIS: 
RandomForestClassifier(max_depth=10, min_samples_split=0.05, n_estimators=150)

Best score RandomForest AIS: 
0.8981122448979592



In [None]:
#Get the results for all classifiers 
cross_val_resultsGB = grid_searchGradientBoosting.cv_results_
cross_val_resultsRF = grid_searchRandomForest.cv_results_
cross_val_resultsLR = grid_searchLogisticRegression.cv_results_
cross_val_resultsKN = grid_searchKNeighbors.cv_results_

cross_val_resultsGB_AIS = grid_searchGradientBoosting_AIS.cv_results_
cross_val_resultsRF_AIS = grid_searchRandomForest_AIS.cv_results_
cross_val_resultsLR_AIS = grid_searchLogisticRegression_AIS.cv_results_
cross_val_resultsKN_AIS = grid_searchKNeighbors_AIS.cv_results_


#Print the results of all classiifiers
#GBC
print(f"Mean Test Accuracy for Gradient Boosting: \n{mean(cross_val_resultsGB['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Gradient Boosting: \n{mean(cross_val_resultsGB['mean_test_bal_accuracy'])}\n")
print("_____________________________________________\n")
print(f"Mean Test Accuracy for Gradient Boosting AIS: \n{mean(cross_val_resultsGB_AIS['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Gradient Boosting AIS: \n{mean(cross_val_resultsGB_AIS['mean_test_bal_accuracy'])}\n")
print("\n_____________________________________________\n")
#RFC
print(f"Mean Test Accuracy for Random Forests: \n{mean(cross_val_resultsRF['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Random Forests: \n{mean(cross_val_resultsRF['mean_test_bal_accuracy'])}\n")
print("_____________________________________________\n")
print(f"Mean Test Accuracy for Random Forests AIS: \n{mean(cross_val_resultsRF_AIS['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Random Forests AIS: \n{mean(cross_val_resultsRF_AIS['mean_test_bal_accuracy'])}\n")
print("\n_____________________________________________\n")
#LRC
print(f"Mean Test Accuracy for Logistic Regression: \n{mean(cross_val_resultsLR['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Logistic Regression: \n{mean(cross_val_resultsLR['mean_test_bal_accuracy'])}\n")
print("_____________________________________________\n")
print(f"Mean Test Accuracy for Logistic Regression AIS: \n{mean(cross_val_resultsLR_AIS['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for Logistic Regression AIS: \n{mean(cross_val_resultsLR_AIS['mean_test_bal_accuracy'])}\n")
print("\n_____________________________________________\n")

#KNC
print(f"Mean Test Accuracy for K Nearest Neighbours: \n{mean(cross_val_resultsKN['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for K Nearest Neighbours: \n{mean(cross_val_resultsKN['mean_test_bal_accuracy'])}\n")
print("_____________________________________________\n")
print(f"Mean Test Accuracy for K Nearest Neighbours AIS: \n{mean(cross_val_resultsKN_AIS['mean_test_roc_auc'])}\n")
print(f"Balanced Test Accuracy for K Nearest Neighbours AIS: \n{mean(cross_val_resultsKN_AIS['mean_test_bal_accuracy'])}\n")
print("\n_____________________________________________\n")

Mean Test Accuracy for Gradient Boosting: 
0.8266026814637419

Balanced Test Accuracy for Gradient Boosting: 
0.7436337868480726

_____________________________________________

Mean Test Accuracy for Gradient Boosting AIS: 
0.9116964141098616

Balanced Test Accuracy for Gradient Boosting AIS: 
0.8522293083900226


_____________________________________________

Mean Test Accuracy for Random Forests: 
0.7946871095376926

Balanced Test Accuracy for Random Forests: 
0.7229607780612245

_____________________________________________

Mean Test Accuracy for Random Forests AIS: 
0.9183122917534361

Balanced Test Accuracy for Random Forests AIS: 
0.8727551020408163


_____________________________________________

Mean Test Accuracy for Logistic Regression: 
0.5824836873524919

Balanced Test Accuracy for Logistic Regression: 
0.5726190476190476

_____________________________________________

Mean Test Accuracy for Logistic Regression AIS: 
0.6732719005969734

Balanced Test Accuracy for Logistic 

In [None]:
predictions_test_over_GB = grid_searchGradientBoosting.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_RF = grid_searchRandomForest.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_LR = grid_searchLogisticRegression.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_KN = grid_searchKNeighbors.best_estimator_.predict(data_test.drop(["5"],axis=1))

predictions_test_over_GB_AIS = grid_searchGradientBoosting_AIS.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_RF_AIS = grid_searchRandomForest_AIS.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_LR_AIS = grid_searchLogisticRegression_AIS.best_estimator_.predict(data_test.drop(["5"],axis=1))
predictions_test_over_KN_AIS = grid_searchKNeighbors_AIS.best_estimator_.predict(data_test.drop(["5"],axis=1))

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_GB).ravel()
print("Gradient Boosting")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_RF).ravel()
print("Random Forest")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_LR).ravel()
print("Linear Regresion")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_KN).ravel()
print("K Neighbors")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_GB_AIS).ravel()
print("Gradient Boosting AIS")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_RF_AIS).ravel()
print("Random Forest AIS")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_LR_AIS).ravel()
print("Linear Regresion AIS")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")

TN, FP, FN, TP = confusion_matrix(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_KN_AIS).ravel()
print("K Neighbors AIS")
print('True Positive(TP)  = ', TP)
print('False Positive(FP) = ', FP)
print('True Negative(TN)  = ', TN)
print('False Negative(FN) = ', FN)
print("__________________________________")


print(f"Geometric Mean Score for Gradient Boosting: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_GB, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for Random Forest: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_RF, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for Logestic Regression: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_LR, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for K Neighbors: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_KN, labels=None, pos_label=1, average='binary',)}\n")

print(f"Geometric Mean Score for Gradient Boosting AIS: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_GB_AIS, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for Random Forest AIS: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_RF_AIS, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for Logestic Regression AIS: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_LR_AIS, labels=None, pos_label=1, average='binary',)}\n")
print(f"Geometric Mean Score for K Neighbors AIS: \n{geometric_mean_score(data_test.drop(data_test.columns[0:-1],axis=1), predictions_test_over_KN_AIS, labels=None, pos_label=1, average='binary',)}\n")

Gradient Boosting
True Positive(TP)  =  6
False Positive(FP) =  11
True Negative(TN)  =  39
False Negative(FN) =  4
__________________________________
Random Forest
True Positive(TP)  =  8
False Positive(FP) =  9
True Negative(TN)  =  41
False Negative(FN) =  2
__________________________________
Linear Regresion
True Positive(TP)  =  6
False Positive(FP) =  22
True Negative(TN)  =  28
False Negative(FN) =  4
__________________________________
K Neighbors
True Positive(TP)  =  4
False Positive(FP) =  9
True Negative(TN)  =  41
False Negative(FN) =  6
__________________________________
Gradient Boosting AIS
True Positive(TP)  =  8
False Positive(FP) =  3
True Negative(TN)  =  47
False Negative(FN) =  2
__________________________________
Random Forest AIS
True Positive(TP)  =  6
False Positive(FP) =  0
True Negative(TN)  =  50
False Negative(FN) =  4
__________________________________
Linear Regresion AIS
True Positive(TP)  =  5
False Positive(FP) =  6
True Negative(TN)  =  44
False Negat