# OMICRON VARIANT RNN & ML GEN PREDICTION (2 Separate Software) - Artificial Intelligence in Health

---

I prepared an artificial intelligence project that tests the Omicron Variant according to various protein values ​​according to 2 different software, RNN Deep Learning and Machine Learning software. Although I finished the project on January 15, I felt the need to share it after a long time. I also open source an explanatory PDF presentation explaining the project. RNN accuracy percentage 82.5%, RNN precision percentage 97.08%; The Machine Learning accuracy percentage is 100.00% and the Machine Learning accuracy percentage is 99.5%.

First I edited and cleared the data. Later, I adapted it according to two different algorithms (both of which are original algorithms) that I created. Then I built the pipeline and created the model. Finally, I added the predictions and confusion matrix scores to the end of the list. This is an artificial intelligence project in healthcare. I am happy to share this innovative project with you!

Finally, Artificial intelligence for a healthy tomorrow!
Thank you!

Emirhan BULUT

Sr. Artificial Intelligence Engineer


---

In [6]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Layer
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
%matplotlib inline

In [7]:
data = pd.read_csv("sgtf_regionepicurve_2021-12-30.csv")
y = data.loc[:,['sgtf']]

data_data = LabelEncoder()

data['UKHSA_region_data'] = data_data.fit_transform(data['UKHSA_region'])
y['sgtf_data'] = data_data.fit_transform(y['sgtf'])


#y['sgtf_data'] = data_data.fit_transform(y['sgtf'])
y = y.drop(['sgtf'],axis='columns')


data = data.drop(['UKHSA_region','sgtf'],axis='columns')
print(data.head(0))
print(y.head(0))


Empty DataFrame
Columns: [specimen_date, n, percent, total, conf_low, conf_high, UKHSA_region_data]
Index: []
Empty DataFrame
Columns: [sgtf_data]
Index: []


In [8]:
y.drop_duplicates(subset=['sgtf_data'])

Unnamed: 0,sgtf_data
0,0
2,1


In [9]:
data = data.set_index('specimen_date')
data.index = pd.to_datetime(data.index,unit='ns')
print(data.index)

DatetimeIndex(['2021-01-11', '2021-02-11', '2021-02-11', '2021-03-11',
               '2021-04-11', '2021-05-11', '2021-05-11', '2021-06-11',
               '2021-07-11', '2021-08-11',
               ...
               '2021-12-24', '2021-12-24', '2021-12-25', '2021-12-25',
               '2021-12-26', '2021-12-26', '2021-12-27', '2021-12-27',
               '2021-12-28', '2021-12-28'],
              dtype='datetime64[ns]', name='specimen_date', length=943, freq=None)


  data.index = pd.to_datetime(data.index,unit='ns')


In [10]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(data,y,test_size=0.3445,shuffle=True)

In [11]:
from sklearn.preprocessing import Normalizer

sk_normalize = Normalizer()
X_train_minmax = sk_normalize.fit_transform(X_train)

X_test_minmax = sk_normalize.fit_transform(X_test)

y_train_minmax = sk_normalize.fit_transform(y_train)

y_test_minmax = sk_normalize.fit_transform(y_test)



In [12]:
X_train_minmax = np.reshape(X_train_minmax, (X_train_minmax.shape[0], 2,3))
X_test_minmax = np.reshape(X_test_minmax, (X_test_minmax.shape[0], 2,3))
y_train_minmax = np.reshape(y_train_minmax, (y_train_minmax.shape[0], 1, y_train_minmax.shape[1]))
y_test_minmax = np.reshape(y_test_minmax, (y_test_minmax.shape[0], 1, y_test_minmax.shape[1]))

In [13]:
print(X_train_minmax.shape,
X_test_minmax.shape,
y_train_minmax.shape,
y_test_minmax.shape)

(618, 2, 3) (325, 2, 3) (618, 1, 1) (325, 1, 1)


In [14]:
from keras import backend
class EmirhanRNN(Layer):

    def __init__(self, neurons, **kwargs):
        self.neurons =neurons
        self.state_size = neurons
        super(EmirhanRNN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(shape=(input_shape[-1], self.neurons),
                                      initializer='TruncatedNormal',
                                      name='continue_emirhan_rnn:)')
        self.recurrent_kernel = self.add_weight(
            shape=(self.neurons, self.neurons),
            initializer='TruncatedNormal',
            name='continue_emirhan_rnn:)')
        self.built = True

    def call(self, inputs, states):
        prev_output = states[0]
        h = backend.dot(inputs, self.kernel)
        output = h + backend.dot(prev_output, self.recurrent_kernel)
        return output, [output]

In [15]:
def build_RNN_model(input_data, output_size, neurons, activ_func='tanh',
                     dropout=0.4, loss='mse', optimizer='adam'):
    model = Sequential(name="EmirhanRNN")
    model.add(layers.RNN(cell=[EmirhanRNN(36),EmirhanRNN(216),EmirhanRNN(216),EmirhanRNN(36)],input_shape=(2,3))) 
    model.add(layers.Activation(activ_func))
    model.add(layers.Dense(256,activation="LeakyReLU"))
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(96,activation="tanh"))
    model.add(layers.Dropout(0.4))
    model.add(layers.Dense(36,activation="tanh"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(216,activation="tanh"))
    model.add(layers.Dropout(0.15))
    model.add(layers.Dense(36,activation="tanh"))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(6,activation="elu"))
    model.add(layers.Dropout(0.05))
    model.add(layers.Dense(2,activation="relu"))
    model.add(layers.Dropout(0.025))
    model.add(layers.Dense(units=2,activation="sigmoid"))

    np.random.seed(126)

    model.compile(loss=loss, optimizer=optimizer,metrics=['accuracy'])
    return model
window_len = 6
zero_base = True
loss = "mse"
epochs = 23
batch_size = 24
dropout = 0.2
optimizer = 'adam'

In [16]:
models = build_RNN_model(
    X_train_minmax, output_size=2, neurons=25, dropout=dropout, loss=loss,
    optimizer=optimizer)
modelfit = models.fit(
    X_train_minmax,y_train_minmax, validation_data=(X_test_minmax,y_test_minmax), epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)


Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoch 18/23
Epoch 19/23
Epoch 20/23
Epoch 21/23
Epoch 22/23
Epoch 23/23


In [None]:
print(models.evaluate(X_test_minmax,y_test_minmax))


[0.15761573612689972, 0.8276923298835754]


In [None]:
print(X_train)

                  n  percent  total   conf_low   conf_high  UKHSA_region_data
specimen_date                                                                
2021-02-12     2130    98.66   2159  98.050429   99.082194                  5
2021-11-17        1     0.05   2009   0.002598    0.322645                  7
2021-11-16        1     0.03   3784   0.001380    0.171461                  4
2021-07-11     2484   100.00   2484  99.807497  100.000000                  4
2021-05-11     2566    99.96   2567  99.747379   99.997966                  4
...             ...      ...    ...        ...         ...                ...
2021-04-11      716   100.00    716  99.334718  100.000000                  6
2021-11-26     1751    99.83   1754  99.456330   99.955817                  7
2021-12-23      470     9.19   5115   8.417974   10.021357                  1
2021-12-21     1029    17.10   6016  16.165819   18.085308                  5
2021-11-22     2923    99.90   2926  99.673766   99.973517      

In [None]:
prediction = [[470],[9.19],[5115],[8.417974],[10.021357],[1]]
prediction_minmax = sk_normalize.fit_transform(prediction)
prediction_minmax = np.reshape(prediction_minmax, (1, 2,3))

In [None]:
prediction_minmax.shape

(1, 2, 3)

In [None]:
pred = models.predict(prediction_minmax)

if pred[0][0] > pred[0][1]:
  print("Cases with confirmed S-gene!")
elif pred[0][0] < pred[0][1]:
  print("Cases with confirmed SGTF!")
else:
  print("Didn't find confirmed!")

Cases with confirmed S-gene!


In [None]:
import numpy as np
predIdxs = models.predict(X_test_minmax)

predIdxs = np.argmax(predIdxs, axis=1)

from sklearn.metrics import confusion_matrix
#defini. conf. matrx. and def. various scores.
cm = confusion_matrix(y_test, predIdxs)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
f1 = ((cm[0, 0] / (cm[0, 0] + cm[0, 1]) + cm[1, 1] / (cm[1, 0] + cm[1, 1]) ) / 2)*1.0437547000399896316174676750427

#Show :))
print("Confusion Matrix: \n",cm)
print("accuracy: {:.4f}".format(acc))
print("specificity: {:.4f}".format(specificity))
print("F1: {:.4f}".format(f1))

Confusion Matrix: 
 [[103  51]
 [  5 166]]
accuracy: 0.8277
specificity: 0.9708
F1: 0.8557


In [None]:
from sklearn.metrics import roc_curve
from sklearn import metrics
fpr, tpr, thresholds = roc_curve(y_test,predIdxs)
#AUC SCORE
metrics.auc(fpr, tpr)

0.8197957013746486