# _Importing Dependencies_

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import re
import string

from transformers import pipeline, TFAutoModelForSequenceClassification, AutoTokenizer
import tensorflow as tf
from kerastuner.tuners import RandomSearch
import keras_tuner

from sklearn.preprocessing import LabelEncoder

from sklearn.preprocessing import RobustScaler, QuantileTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

2022-10-31 21:06:49.763181: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-31 21:06:50.095566: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-31 21:06:50.095606: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-10-31 21:06:50.134659: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-31 21:06:51.042739: W tensorflow/stream_executor/platform/de

In [2]:
train_final = pd.read_csv('train_final.csv')
test_final = pd.read_csv('test_final.csv')

In [3]:
X = train_final.drop(['patient_id', 'drug_approved_by_UIC', 'base_score', 'date'], axis=1)
y = train_final['base_score']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=14)

# _Scaling the data_

In [5]:
QScaler = QuantileTransformer(output_distribution='normal', n_quantiles=1000)

##### Scaled 3 features

In [6]:
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

In [9]:
X_train_scaled['days'] = QScaler.fit_transform(np.array(X_train['days'].values).reshape(-1,1))
X_test_scaled['days'] = QScaler.transform(np.array(X_test['days'].values).reshape(-1,1))

X_train_scaled['effectivity'] = QScaler.fit_transform(np.array(X_train['effectivity'].values).reshape(-1,1))
X_test_scaled['effectivity'] = QScaler.transform(np.array(X_test['effectivity'].values).reshape(-1,1))


X_train_scaled['number_of_times_prescribed'] = QScaler.fit_transform(np.array(X_train['number_of_times_prescribed'].values).reshape(-1,1))
X_test_scaled['number_of_times_prescribed'] = QScaler.transform(np.array(X_test['number_of_times_prescribed'].values).reshape(-1,1))

X_train_scaled['effectiveness_rating'] = QScaler.fit_transform(np.array(X_train['effectiveness_rating'].values).reshape(-1,1))
X_test_scaled['effectiveness_rating'] = QScaler.transform(np.array(X_test['effectiveness_rating'].values).reshape(-1,1))

# _Neural Network_

In [14]:
# loss function for the model
mse = tf.keras.losses.MeanSquaredError()
mae = tf.keras.metrics.MeanAbsoluteError()

## _Tuning_

'''Building neural network'''
def build_model(hp): 
    inputs = tf.keras.Input(shape=(X_train_scaled.shape[1],))
    
    x = tf.keras.layers.Dense(
            hp.Int('Dense_layers', 64, 128, 16), 
            activation='relu'
        )(inputs)
    
    
    for i in range(hp.Int('nlayers', 1, 5)):
        
        x = tf.keras.layers.Dense(
                units=hp.Int(f'layer_{i+1}', 64, 512, 4),
                activation=(hp.Choice(f'activation_{i+1}', ['relu', 'tanh']))
            )(x)
    

    outputs = tf.keras.layers.Dense(1, activation='relu')(x)


    model = tf.keras.Model(inputs=inputs, outputs=outputs)


    '''Compiling model'''
    model.compile(optimizer='adam', 
                        loss=mse, 
                        metrics=[mae]
                       )



    print(model.summary())

    return model                         

tuner = RandomSearch(build_model,
                     objective=keras_tuner.Objective("val_mean_absolute_error", direction="min"),
                     max_trials=5,
                     overwrite=True,
                    )

tuner.search(x=X_train_scaled, 
             y=y_train.values,
             batch_size=16,
             validation_data=(X_test_scaled, y_test.values))

tuner.results_summary()

# _Optimized Model_

In [15]:
inputs = tf.keras.Input(X_test_scaled.shape[1], )

x = tf.keras.layers.Dense(128, activation='relu')(inputs)
x = tf.keras.layers.Dense(372, activation='relu')(x)
x = tf.keras.layers.Dense(108, activation='tanh')(x)
x = tf.keras.layers.Dense(84, activation='tanh')(x)
x = tf.keras.layers.Dense(128, activation='tanh')(x)
x = tf.keras.layers.Dense(84, activation='tanh')(x)

outputs = tf.keras.layers.Dense(1, activation='relu')(x)


model_final = tf.keras.Model(inputs=inputs, outputs=outputs)

'''Compiling model'''
model_final.compile(optimizer='adam', 
                    loss=mse, 
                    metrics=[mae]
                   )

model_final.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 125)]             0         
                                                                 
 dense_7 (Dense)             (None, 128)               16128     
                                                                 
 dense_8 (Dense)             (None, 372)               47988     
                                                                 
 dense_9 (Dense)             (None, 108)               40284     
                                                                 
 dense_10 (Dense)            (None, 84)                9156      
                                                                 
 dense_11 (Dense)            (None, 128)               10880     
                                                                 
 dense_12 (Dense)            (None, 84)                1083

In [16]:
history = model_final.fit(
    X_train_scaled,
    y_train.values,
    validation_split=0.2,
    batch_size=16,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


# _Results_

In [17]:
y_pred = model_final.predict(X_test_scaled)
y_pred



array([[6.2476554],
       [4.276334 ],
       [6.2272696],
       ...,
       [7.5737576],
       [6.294425 ],
       [4.509162 ]], dtype=float32)

In [22]:
rmse_score = np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
mae_score = np.mean(tf.keras.metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred))

print(f'RSME score : {rmse_score}')
print(f'MAE score :  {mae_score}')

RSME score : 1.0226226704077346
MAE score :  1.5831232070922852
