In [4]:
from cashe import get_cashed_input
from src.data_local import MergedDataPreprocessing
from src.lstm_encoder import LSTMEmbedding
import time
from src.model_train import encode_label
import numpy as np

In [7]:
df_original = get_cashed_input()
df_original = df_original[['VISIT_ID','SERVICE_DESCRIPTION', 'SERVICE_TYPE', 'OASIS_IOS_DESCRIPTION','PROVIDER_DEPARTMENT','OUTCOME']]

In [8]:
preprocessing = MergedDataPreprocessing(df_original)
df_train, df_test = preprocessing.train_test_split()

In [9]:
train_columns = list(df_train.columns)[1:]

In [10]:
X_train = df_train[train_columns[:-1]];  y_train = df_train[train_columns[-1]].values
X_test = df_test[train_columns[:-1]];  y_test = df_test[train_columns[-1]].values

y_train = encode_label(y_train); y_test = encode_label(y_test)

In [11]:
X_train

Unnamed: 0,SERVICE_DESCRIPTION,SERVICE_TYPE,OASIS_IOS_DESCRIPTION,PROVIDER_DEPARTMENT
0,Unlisted Code,services,I.V. INFUSION OBS.,OBSTERTRIC/GYNEACOLOGIC SERVIC - نساء و ولادة
2,In vivo laboratory,laboratory,VITAMIN D (TOTAL),INTERNAL MEDICINE - الباطنة
3,ELICA-M CREAM 30 GM,medication-codes,ELICA-M CREAM 30 GM,OBSTERTRIC/GYNEACOLOGIC SERVIC - نساء و ولادة
4,Peripheral intravenous cannula,medical-devices,INSERTION OF AN I.V. CANNULA,EMERGENCY ROOM SERVICES - طوارى
5,Computerised tomography of brain,imaging,C.T. BRAIN WITHOUT CONTRAST,EMERGENCY ROOM SERVICES - طوارى
...,...,...,...,...
488824,HIV-1 AG W/HIV-1 HIV-2 AB,laboratory,HIV AG- ABS SCREEN (COMBO),CARDIOLOGY SERVICES - القلب
488825,APIDRA SOLOSTAR 100 U- ML DISPOSABLE PEN,medication-codes,APIDRA SOLOSTAR 100 U- ML DISPOSABLE PEN,CARDIOLOGY SERVICES - القلب
488826,TRESIBA FLEXTOUCH 100 U/ML SOLUTION FOR INJECTION,medication-codes,TRESIBA FLEXTOUCH 100 U/ML SOLUTION FOR INJECTION,CARDIOLOGY SERVICES - القلب
488827,Medical bag,medical-devices,Medical bag,CARDIOLOGY SERVICES - القلب


In [12]:
lstm_embedding = LSTMEmbedding()
X_train_processed = lstm_embedding.preprocess_data(X_train)
X_train_processed

array([[ 71,  69,   1, ...,   0,   0,   0],
       [ 32, 232,   4, ...,   0,   0,   0],
       [609, 236, 181, ...,   0,   0,   0],
       ...,
       [766, 704,  77, ...,   0,   0,   0],
       [ 31, 210,  31, ...,   0,   0,   0],
       [ 68, 142, 196, ...,   0,   0,   0]])

In [13]:
print('Training Model')
lstm_embedding.train_model(X_train_processed, y_train,epochs=20,batch_size=32)

print('Evaluating Model')
accuracy = lstm_embedding.evaluate_model(X_test, y_test)

print('Model Accuracy on test data: ',accuracy)

Training Model
Epoch 1/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 7ms/step - accuracy: 0.5402 - loss: 0.6857
Epoch 2/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 6ms/step - accuracy: 0.5738 - loss: 0.6719
Epoch 3/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 7ms/step - accuracy: 0.5852 - loss: 0.6651
Epoch 4/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 6ms/step - accuracy: 0.5896 - loss: 0.6614
Epoch 5/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 6ms/step - accuracy: 0.5961 - loss: 0.6556
Epoch 6/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 6ms/step - accuracy: 0.6028 - loss: 0.6521
Epoch 7/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 7ms/step - accuracy: 0.6026 - loss: 0.6527
Epoch 8/20
[1m4848/4848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 7ms/step - accuracy: 0.6094 - loss: 0.6501
E

In [14]:
lstm_embedding.input_shape

(37, 1)

In [15]:
X_test_vectorized = lstm_embedding.embedding_vector(X_test[:3],reload_model=False)
X_test_vectorized

array([[157,   4, 157, 237, 276, 228,  23,  53,  37,   1,  56,  57,  58,
          0,   0,   0],
       [105,  22,  41,   2,   3, 495, 497, 287, 105,  22,  41,  54,  34,
          1,  38,  59],
       [635, 476,   8,  60,  33,   2,   3, 635, 476,   8,  60,  33,  36,
          1,  39,   0]])

In [16]:
start_time = time.time()
lstm_encoder = LSTMEmbedding()
lstm_encoder.load_model()
X_test_vectorized = lstm_embedding.embedding_vector(X_test[:10],reload_model=True)

end_time = time.time()
execution_time = end_time - start_time
print("Execution time: {:.2f} seconds".format(execution_time))

Execution time: 0.31 seconds


In [17]:
X_test_vectorized

array([[143, 143, 142, 139, 129, 239, 381,   0,   0,   0,   0,   0,   0,
          0,   0,   0],
       [338, 339, 174,  24, 338, 339, 174,  24, 441,   0,   0,   0,   0,
          0,   0,   0],
       [  6,  27, 132,   6,  27, 132,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0],
       [853,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0],
       [ 96, 107,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0],
       [711,  35,   6,  37, 711,  35,   6,  37,   0,   0,   0,   0,   0,
          0,   0,   0],
       [697, 344, 321, 237, 697, 344, 321, 237, 239, 381,   0,   0,   0,
          0,   0,   0],
       [321, 176, 234, 321, 176, 234, 239, 381,   0,   0,   0,   0,   0,
          0,   0,   0],
       [697, 344, 321, 237, 697, 344, 321, 237, 239, 381,   0,   0,   0,
          0,   0,   0],
       [611,  76, 419, 268, 557, 611,  76, 419, 268, 557, 239, 381,   0,
          0,   0,   0]])

In [18]:
def vectorize(COUNT_VALS):
      preds = lstm_encoder.embedding_vector(X_test[:COUNT_VALS],reload_model=True)
      return

def calculate_excecution():
  exs = []
  COUNT_VALS = 50
  for i in range(10):
    start_time = time.time()

    vectorize(COUNT_VALS)

    end_time = time.time()

    execution_time = end_time - start_time
    exs.append(execution_time)

  average = np.array(exs).mean()
  return average/COUNT_VALS

In [19]:
calculate_excecution()

0.001926307201385498