In [None]:
!pip uninstall tensorflow --yes
!pip install tensorflow_decision_forests
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2 --yes
!pip install kerassurgeon

In [None]:
import os
import cv2
import gc
import numpy as np
import pandas as pd
import glob
import pathlib
import tensorflow as tf
import tensorflow_addons as tfa
import seaborn as sns
import matplotlib.pyplot as plt
import keras
from keras.preprocessing.image import ImageDataGenerator
#from keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras import datasets, layers, models,Input,Model 
import tensorflow_datasets as tfds
from keras.models import Sequential
from keras.layers import  Bidirectional, Conv2D, BatchNormalization, MaxPooling2D, Flatten, LSTM, Dense, Lambda, Dropout,Reshape
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from tensorflow.keras.metrics import Accuracy, Recall,Precision
from sklearn.tree import DecisionTreeClassifier as Decisiontree
from sklearn.svm import SVC as Supportvectorclassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import time
from functools import reduce
from kerassurgeon.operations import delete_layer, insert_layer
from keras.utils import to_categorical
import pickle

In [None]:
model = keras.models.load_model('../input/datafromoriginalmodel/my_model')
train_ds=tf.data.Dataset.load("../input/datafromoriginalmodel/train_ds")
test_ds=tf.data.Dataset.load("../input/datafromoriginalmodel/test_ds")

# Extracting features
This section selects only the feature extraction part of our model. we will use  this to extract features for use in our other models.

In [None]:
# feature extraction model

feature_extractor=Model(
    inputs=model.inputs,
    outputs=model.get_layer(name="flatten_layer").output,name="Feature_extractor"
)
#feature_extractor.summary() 

Her we extract the features and their corresponding labels.

In [None]:
# getting training set features and labels

# feature matrix
features=feature_extractor.predict(train_ds)

# labels
def labels(tfdata):
    labels=[]
    for batch in tfdata:
        image,lab=batch
        lab=lab.numpy()
        labels.append(lab)
    labels=np.concatenate(labels)
    return labels

def two_cat_to_one(labels):
    lls=[]
    for label in labels:
        lls.append(label[0])
    return lls

labs=two_cat_to_one(labels(train_ds))
# getting testing set features
# feature matrix
features_test=feature_extractor.predict(test_ds)

# labels
lab_test=two_cat_to_one(labels(test_ds)) 

# Logistic regression

The binary logistic regression approach in this study assumes a linear relationship between the odds of COVID-19 infection and the features extracted from X-ray images (Bonetto & Latzko, 2020). The relationship is modeled by the following function

 $log⁡(\frac{p}{(1-p)}) = \beta_0+\beta_1 x_1+\beta_2 x_2+⋯+\beta_n x_n$ 
 
 
Where;

$\beta_0…\beta_n$ are regression coefficients,

 $x_1…x_n$ are the observed features,
 
p is the probability of contracting COVID-19 while the quantity  

$\frac{p}{(1-p)}$is the odds of contracting COVID-19

**fitting logistic**

In [None]:
logistic = LogisticRegression()
logistic_model=logistic.fit(features,labs)

In [None]:
st = time.time()
pred=logistic_model.predict(features_test)
et = time.time()
elapsed_time=round((et - st)/len(lab_test),4)

cm = metrics.confusion_matrix(lab_test, pred)
metrics.plot_confusion_matrix(logistic_model,features_test,lab_test,display_labels=['COVID', 'Normal'],
                             cmap="YlOrBr")

In [None]:
logist=pd.DataFrame({'Measure':['Accuracy','Sensitivity','Specitivity','Precision','F1_score','Excecution time'],
             'Logistic regression':[metrics.accuracy_score(lab_test, pred),
                        metrics.recall_score(lab_test, pred,pos_label=0),
                        metrics.recall_score(lab_test, pred,pos_label=1),
                        metrics.precision_score(lab_test, pred,pos_label=0),
                        metrics.f1_score(lab_test, pred,pos_label=0),
                        elapsed_time]})
logist


In [None]:
with open("logistic.pkl","wb") as file:
    pickle.dump(logistic_model,file)
    
#with open("original_hist.pkl","rb") as file:
#    hist=pickle.load(file)


# Decision tree
The decision tree is among the most commonly utilize Machine Learning techniques as far as data classification and regression are concerned. Presumes that predictors can be combined and are independent to generate a probability of one class over the other. It uses heuristic processes to determine the best cutoffs and order to use in making decisions as error rates are identified. It can precisely classify every element in a data set given all feature values are unique. R or rpart classification trees software can be used. Recursive tree partitioning is the most popular decision tree method that will be used in determining the trends and patterns from the given data. The grouping of training data also helps in lowering the residual sum of squares. Besides, the guidelines for data estimation and model development are preserved for use in future datasets. The model entails the use of control features known as hyperparameters to control the number of tree patterns obtained from the guidelines to minimize complexity.

We will need to tune hyper parameters to ensure optimal performsance. here we choose the set of values to try.

In [None]:
# tuning hyper parameter
parameters={"splitter":["best","random"],
            "max_depth" : list(map(int,range(1,features.shape[0]-1,round((features.shape[0]-1)/5)))),
           "min_samples_leaf":[2,5,8,11],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.5],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,40,70],
           "random_state":[0]}

Here we evaluate the model performmance while testing prediction speed

In [None]:
DT = Decisiontree(random_state = 0)
DT=DT.fit(features,labs)
# Grid search
np.random.seed(100)
tuned=GridSearchCV(DT,param_grid=parameters,cv=3)
tuned.fit(features,labs)
DT =Decisiontree(**tuned.best_params_)
DT =DT.fit(features,labs)
tuned.best_params_

In [None]:
st = time.time()
pred=DT.predict(features_test)
et = time.time()
elapsed_time=round((et - st)/len(lab_test),4)
cm = metrics.confusion_matrix(lab_test, pred)
metrics.plot_confusion_matrix(DT,features_test,lab_test,display_labels=['COVID', 'Normal'],
                             cmap="YlOrBr")

In [None]:
Decision=pd.DataFrame({'Measure':['Accuracy','Sensitivity','Specitivity','Precision','F1_score','Excecution time'],
             'Decision tree':[metrics.accuracy_score(lab_test, pred),
                        metrics.recall_score(lab_test, pred,pos_label=0),
                        metrics.recall_score(lab_test, pred,pos_label=1),
                        metrics.precision_score(lab_test, pred,pos_label=0),
                        metrics.f1_score(lab_test, pred,pos_label=0),
                             elapsed_time]})
Decision

In [None]:
with open("DT.pkl","wb") as file:
    pickle.dump(DT,file)
    
#with open("original_hist.pkl","rb") as file:
#    hist=pickle.load(file)