# Data Preparation and Processing

In [1]:
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler

In [2]:
train_labels = []
train_samples = []

Exmple data:
 - experimental drug was tested on individuals ranging from age 13 to 100 in a clinical trial.
 - The trial had 2100 participants. Half of the participants were under 65 years old, and the other half was 65 years of age or older.
 - around 95% of patients 65 or older experienced side effects from the drug
 - around 95% of patients under 65 experienced no side effects

In [3]:
for i in range(50):
    # The ~5% of younger individuals who did exprience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(1)

    # The ~5% of older individuals who did not exprience side effects
    random_older = randint(64,100)
    train_samples.append(random_older)
    train_labels.append(0)

for i in range(1000):
    # The ~95% of younger individuals who did not exprience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(0)

    # The ~95% of older individuals who did exprience side effects
    random_older = randint(64,100)
    train_samples.append(random_older)
    train_labels.append(1)


In [5]:
print (train_labels) 
print (train_samples) 

[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 

convert both lists into numpy arrays due to what the fit() function expects, and then shuffle the arrays to remove any order that was imposed on the data during the creation process.

In [6]:
train_labels = np.array(train_labels)
train_samples = np.array(train_samples)
train_labels , train_samples = shuffle(train_labels, train_samples)

In [11]:
print (train_labels[:25]) 
print (train_samples[:25])

[1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 0 1 0 0 1 1 1 0 1]
[99 80 76 65 87 29 94 79 20 27 83 33 66 96 47 40 52 85 19 30 73 73 67 18
 65]


In [15]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform(train_samples.reshape(-1,1))

In [24]:
print(scaled_train_samples[:10])

[[0.98850575]
 [0.77011494]
 [0.72413793]
 [0.59770115]
 [0.85057471]
 [0.18390805]
 [0.93103448]
 [0.75862069]
 [0.08045977]
 [0.16091954]]


# Simple tf,tensorflow as tf

In [25]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy




In [28]:
model = Sequential([
    Dense(units=16, input_shape=(1,), activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=2, activation='softmax'),
])

In [30]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 16)                32        
                                                                 
 dense_2 (Dense)             (None, 32)                544       
                                                                 
 dense_3 (Dense)             (None, 2)                 66        
                                                                 
Total params: 642 (2.51 KB)
Trainable params: 642 (2.51 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [31]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [32]:
model.fit(x=scaled_train_samples, y=train_labels, batch_size=10, epochs=30, verbose=2)

Epoch 1/30


210/210 - 1s - loss: 0.6633 - accuracy: 0.5333 - 750ms/epoch - 4ms/step
Epoch 2/30
210/210 - 0s - loss: 0.6315 - accuracy: 0.6638 - 158ms/epoch - 753us/step
Epoch 3/30
210/210 - 0s - loss: 0.5943 - accuracy: 0.7367 - 150ms/epoch - 715us/step
Epoch 4/30
210/210 - 0s - loss: 0.5572 - accuracy: 0.7829 - 164ms/epoch - 783us/step
Epoch 5/30
210/210 - 0s - loss: 0.5208 - accuracy: 0.8129 - 154ms/epoch - 733us/step
Epoch 6/30
210/210 - 0s - loss: 0.4837 - accuracy: 0.8452 - 155ms/epoch - 740us/step
Epoch 7/30
210/210 - 0s - loss: 0.4507 - accuracy: 0.8619 - 153ms/epoch - 730us/step
Epoch 8/30
210/210 - 0s - loss: 0.4213 - accuracy: 0.8681 - 162ms/epoch - 772us/step
Epoch 9/30
210/210 - 0s - loss: 0.3950 - accuracy: 0.8857 - 160ms/epoch - 761us/step
Epoch 10/30
210/210 - 0s - loss: 0.3723 - accuracy: 0.8924 - 155ms/epoch - 739us/step
Epoch 11/30
210/210 - 0s - loss: 0.3527 - accuracy: 0.9043 - 155ms/epoch - 740us/step
Epoch 12/30
210/210 - 0s - loss: 0.3364 - accuracy: 0.9129 - 16

<keras.src.callbacks.History at 0x13918d22510>