<a href="https://colab.research.google.com/github/SMBH-1/tbd/blob/main/DL_Project_Age_And_Side_Effects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler


train_labels = []
train_samples = []

In [None]:
for i in range(50): #5% of 1000 individuals = 50

  #The 5% of YOUNGER individuals who DID experience side effects
  random_younger = randint(13,64)
  train_samples.append(random_younger)
  train_labels.append(1)

  #The 5% of OLDER individuals who DID NOT experience side effects
  random_older = randint(65,100)
  train_samples.append(random_older)
  train_labels.append(0)

for i in range(1000):
  #The 95% YOUNGER individuals who DID NOT experience side effects
  random_younger = randint(13,64)
  train_samples.append(random_younger)
  train_labels.append(0)

  #The 95% of OLDER individuals who DID experience side effects
  random_older = randint(65,100)
  train_samples.append(random_older)
  train_labels.append(1)

for i in train_samples:
  print(i)

In [None]:
for i in train_labels:
  print(i)

In [None]:
#Convert to numpy array as fit function expects & shuffle to remove any imposed order
train_labels = np.array(train_labels)
train_samples = np.array(train_samples)
train_labels, train_samples = shuffle(train_labels, train_samples)

In [None]:
#Rescale data from 13-100 to 0-1 to make calculations faster
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform(train_samples.reshape(-1,1))

In [None]:
for i in scaled_train_samples:
  print(i)

1.   We have generated some raw data
2.   Processed it to be in a NumPy array format as expected by our model
3.   Scaled data to be between 0-1

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print('Num GPUs Available: ', len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [None]:
#Now to build model (Sequential Model - one of the simplest types using Keras/TF)

model = Sequential([
    Dense(units = 16, input_shape = (1,), activation='relu'), #Second layer overall (first hidden layer); input shape param tells what type of input data to expect; 16 neurons
    Dense(units = 128, activation = 'relu'), #2nd hidden layer; arbitrary choice for 32 neurons
    Dense(units = 64, activation = 'relu'), #3rd hidden layer; arbitrary choice for 32 neurons
    Dense(units = 32, activation = 'relu'), #4th hidden layer; arbitrary choice for 32 neurons
    Dense(units = 2, activation='softmax') #output layer w/ softmax func (gives probabilities) - binary choice whether they did or didn't experience side effects
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                32        
                                                                 
 dense_1 (Dense)             (None, 128)               2176      
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 2)                 66        
                                                                 
Total params: 12,610
Trainable params: 12,610
Non-trainable params: 0
_________________________________________________________________


In [None]:
#Now to train the data that we've processed. Model needs to be compiled then fit (trained).

model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(x=scaled_train_samples, y=train_labels, validation_split=0.1, batch_size=60, epochs=5, shuffle=True, verbose=2)

Epoch 1/5
63/63 - 0s - loss: 0.1958 - accuracy: 0.9476 - val_loss: 0.2169 - val_accuracy: 0.9452 - 191ms/epoch - 3ms/step
Epoch 2/5
63/63 - 0s - loss: 0.1957 - accuracy: 0.9505 - val_loss: 0.2205 - val_accuracy: 0.9357 - 160ms/epoch - 3ms/step
Epoch 3/5
63/63 - 0s - loss: 0.1960 - accuracy: 0.9492 - val_loss: 0.2183 - val_accuracy: 0.9452 - 179ms/epoch - 3ms/step
Epoch 4/5
63/63 - 0s - loss: 0.1966 - accuracy: 0.9508 - val_loss: 0.2221 - val_accuracy: 0.9357 - 164ms/epoch - 3ms/step
Epoch 5/5
63/63 - 0s - loss: 0.1962 - accuracy: 0.9489 - val_loss: 0.2165 - val_accuracy: 0.9452 - 169ms/epoch - 3ms/step


<keras.callbacks.History at 0x7fae0612e280>

In [None]:
#Set up test samples

test_labels = []
test_samples = []

for i in range(10):

  #The 5% of younger individuals who did experience side effects
  random_younger = randint(13,64)
  test_samples.append(random_younger)
  test_labels.append(1)

  #The 5% of older individuals who did not experience side effects
  random_older = randint(65,100)
  test_samples.append(random_older)
  test_labels.append(0)

for i in range(1000):
  #The 95% younger individuals who did not experience side effects
  random_younger = randint(13,64)
  test_samples.append(random_younger)
  test_labels.append(0)

  random_older = randint(65,100)
  test_samples.append(random_older)
  test_labels.append(1)

print(len(test_labels))
print(len(test_samples))

2020
2020


In [None]:
test_labels = np.array(test_labels)
test_samples = np.array(test_samples)
test_labels, test_samples = shuffle(test_labels, test_samples)

scaled_test_samples = scaler.fit_transform(test_samples.reshape(-1,1))

In [None]:
#Predict

predictions = model.predict(x=scaled_train_samples, batch_size=10, verbose=0)
#print(len(predictions))
for i in predictions:
  print(i)

In [None]:
rounded_predictions = np.argmax(predictions, axis=-1)
for i in rounded_predictions:
  print(i)

In [None]:
%matplotlib inline
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

In [None]:
rounded_predictions = rounded_predictions.reshape(2020)
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions) #called from sklearn

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tick_marks= np.arrange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  if normalize:
    cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
    print('Normalized confusion matrix')
  else:
    print('Confusion matrix, without normalization')
  
  print(cm)

  thresh = cm.max()/2
  for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i,j],
             horizontalalignment='center',
             color='white' if cm[i,j] > thresh else 'black')
  
  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')

In [None]:
cm_plot_labels = ['no_side_effects', 'had_side_effets']
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')