In [1]:
import pandas as pd
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

from collections import Counter
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [2]:
namefile = "Covid_filterinG_500.csv"
df = pd.read_csv("dataset/dataset/"+namefile)

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   SEX                   500 non-null    int64
 1   AGE                   500 non-null    int64
 2   PREGNANT              500 non-null    int64
 3   PATIENT_TYPE          500 non-null    int64
 4   DATE_DIED             500 non-null    int64
 5   INTUBED               500 non-null    int64
 6   PNEUMONIA             500 non-null    int64
 7   DIABETES              500 non-null    int64
 8   COPD                  500 non-null    int64
 9   ASTHMA                500 non-null    int64
 10  INMSUPR               500 non-null    int64
 11  HIPERTENSION          500 non-null    int64
 12  CARDIOVASCULAR        500 non-null    int64
 13  OBESITY               500 non-null    int64
 14  RENAL_CHRONIC         500 non-null    int64
 15  ICU                   500 non-null    int64
 16  TOBACCO 

In [4]:
X= df.loc[:,['SEX', 'AGE', 'PREGNANT', 'PATIENT_TYPE', 'DATE_DIED', 'INTUBED', 'PNEUMONIA', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'ICU', 'TOBACCO', 'OTHER_DISEASE']].values
y= df.loc[:, ['CLASIFFICATION_FINAL']].values

y = np.array(y).flatten()

In [5]:
smote = SMOTE(random_state=50)
adasyn = ADASYN(random_state=50)
X_res_smote, y_res_smote = smote.fit_resample(X, y)
X_res_adasyn, y_res_adasyn = adasyn.fit_resample(X, y)

In [6]:
reps = [
    ('Oriset', y, X),
    ('Smote', y_res_smote, X_res_smote),
    ('Adasyn', y_res_adasyn, X_res_adasyn)
]

In [7]:
resamcount = []
for rep in reps:
    count = Counter(rep[1])
    resamcount.append([rep[0], count])

In [8]:
tss = [
    ('TS_1',0.1),
    ('TS_2',0.2),
    ('TS_3',0.3)
]

In [9]:
models = [
    ('SVM', SVC()),
    ('RF', RandomForestClassifier()),
    ('NN', MLPClassifier())
]

In [10]:
test_trains = []
for ts in tss:
    for rep in reps:
        resultTT = X_train, X_test, y_train, y_test = train_test_split(
            rep[2], rep[1], test_size=ts[1], random_state=50)
        test_trains.append([rep[0], ts[0], resultTT])

In [11]:
model_trains = []
for model in models:
    for test_train in test_trains:
        result_model_train = model[1].fit(test_train[2][0], test_train[2][2])
        model_trains.append([model[0],test_train[0], test_train[1], test_train[2][1], test_train[2][3], result_model_train])



In [12]:
model_tok = []
for model in model_trains:
    model_tok.append([model[0]+"_"+model[1]+"_"+model[2], model[5]])

In [13]:
display(model_tok)

[['SVM_Oriset_TS_1', SVC()],
 ['SVM_Smote_TS_1', SVC()],
 ['SVM_Adasyn_TS_1', SVC()],
 ['SVM_Oriset_TS_2', SVC()],
 ['SVM_Smote_TS_2', SVC()],
 ['SVM_Adasyn_TS_2', SVC()],
 ['SVM_Oriset_TS_3', SVC()],
 ['SVM_Smote_TS_3', SVC()],
 ['SVM_Adasyn_TS_3', SVC()],
 ['RF_Oriset_TS_1', RandomForestClassifier()],
 ['RF_Smote_TS_1', RandomForestClassifier()],
 ['RF_Adasyn_TS_1', RandomForestClassifier()],
 ['RF_Oriset_TS_2', RandomForestClassifier()],
 ['RF_Smote_TS_2', RandomForestClassifier()],
 ['RF_Adasyn_TS_2', RandomForestClassifier()],
 ['RF_Oriset_TS_3', RandomForestClassifier()],
 ['RF_Smote_TS_3', RandomForestClassifier()],
 ['RF_Adasyn_TS_3', RandomForestClassifier()],
 ['NN_Oriset_TS_1', MLPClassifier()],
 ['NN_Smote_TS_1', MLPClassifier()],
 ['NN_Adasyn_TS_1', MLPClassifier()],
 ['NN_Oriset_TS_2', MLPClassifier()],
 ['NN_Smote_TS_2', MLPClassifier()],
 ['NN_Adasyn_TS_2', MLPClassifier()],
 ['NN_Oriset_TS_3', MLPClassifier()],
 ['NN_Smote_TS_3', MLPClassifier()],
 ['NN_Adasyn_TS_3', M

In [14]:
preds = []
for model_train in model_trains:
    result_predi = model_train[5].predict(model_train[3])
    preds.append([model_train[0], model_train[1], model_train[2], model_train[4], result_predi])

In [15]:
result_full = []
for pred in preds:
    acr = round(accuracy_score(pred[3], pred[4])*100, 2)
    report = classification_report_imbalanced(pred[3], pred[4], output_dict=True )

    pre = round(report['avg_pre']*100, 2)
    rec = round(report['avg_rec']*100, 2)
    spe = round(report['avg_spe']*100, 2)
    f1 = round(report['avg_f1']*100, 2)
    geo = round(report['avg_geo']*100, 2)
    iba = round(report['avg_iba']*100, 2)
    sup = round(report['total_support']*100, 2)

    result_full.append([pred[0], pred[1], pred[2], acr, pre, rec, spe, f1, geo, iba, sup])


In [16]:
display(result_full)

[['SVM', 'Oriset', 'TS_1', 82.0, 83.2, 82.0, 82.17, 82.3, 82.09, 67.37, 5000],
 ['SVM',
  'Smote',
  'TS_1',
  80.88,
  81.88,
  80.88,
  82.1,
  80.94,
  81.33,
  66.06,
  6800],
 ['SVM',
  'Adasyn',
  'TS_1',
  65.28,
  67.99,
  65.28,
  67.85,
  65.06,
  65.55,
  42.86,
  7200],
 ['SVM',
  'Oriset',
  'TS_2',
  77.0,
  77.58,
  77.0,
  73.89,
  77.22,
  75.29,
  56.86,
  10000],
 ['SVM',
  'Smote',
  'TS_2',
  79.41,
  79.85,
  79.41,
  79.89,
  79.48,
  79.62,
  63.37,
  13600],
 ['SVM',
  'Adasyn',
  'TS_2',
  66.67,
  68.43,
  66.67,
  68.43,
  66.72,
  67.18,
  45.05,
  14400],
 ['SVM',
  'Oriset',
  'TS_3',
  78.0,
  78.35,
  78.0,
  74.41,
  78.15,
  76.0,
  57.96,
  15000],
 ['SVM',
  'Smote',
  'TS_3',
  77.94,
  77.95,
  77.94,
  77.94,
  77.94,
  77.94,
  60.75,
  20400],
 ['SVM',
  'Adasyn',
  'TS_3',
  70.37,
  73.21,
  70.37,
  73.25,
  70.41,
  71.15,
  50.48,
  21600],
 ['RF', 'Oriset', 'TS_1', 84.0, 84.67, 84.0, 83.2, 84.19, 83.59, 69.93, 5000],
 ['RF',
  'Smote',
  

In [17]:
# model = pickle.load(open("model_training/"+namefile+".pkl", 'rb'))
# label = pickle.load(open("label_list/"+namefile+".pkl", 'rb'))

In [18]:
# display(model)

In [19]:
# grub = []
# grub.append([resamcount, result_full])
# pickle.dump(grub, open(
#     f"/home/fais/Desktop/Repo/ML_Covid_cls/result/"+namefile+".pkl", "wb"))

In [20]:
# for items in data:
#         for ite in items[2]:
#             display(ite[0]+ite[1]+ite[2])

In [21]:
# datas = [
#     pickle.load(open("result/Covid_filterinG_500.csv.pkl", 'rb')),
#     pickle.load(open("result/Covid_filterinG_1500.csv.pkl", 'rb'))
# ]


# for data in datas:
#     display(data)
    
    