### Preprocessing Data

In [3]:
import numpy as np
from random import randint
from sklearn.preprocessing import MinMaxScaler

In [4]:
train_labels = []
train_samples = []

#### Generating some fake data:
- An experiemental drug was tested on individuals from ages 13 to 100. 
- The trial had 2100 participants. Half were under 65 years old, half were over 65 years old.
- 95% of patientes 65 or older experienced side effects.
- 95% of patients under 65 experienced no side effects.

In [7]:
#Preprocess data for training with Keras
for i in range(50):
    # The 5% of younger (under 65) individuals who did experience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(1)
    
    # The 5% of older (65 or older) individuals who did not experience side effects
    random_older = randint(65,100)
    train_samples.append(random_older)
    train_labels.append(0)

for i in range(1000):
    # The 95% of younger (under 65) individuals who did not experience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(0)
    
    # The 95% of older (65 or older) individuals who did experience side effects
    random_older = randint(65,100)
    train_samples.append(random_older)
    train_labels.append(1)

In [16]:
# printing raw data:
for i in train_samples[0:9]: # printing just first 10 elements
    print(i)

64
79
30
73
58
82
35
77
45


In [17]:
for i in train_labels[0:9]:
    print(i)

1
0
1
0
1
0
1
0
1


In [20]:
#convert to numpy arrays (keras expexted to deal with numpy arrays)
train_labels = np.array(train_labels)
train_samples = np.array(train_samples)

In [21]:
# scalling the data 
# Scikit learn MinnMaxScalor scales data input [13, -100] to be in range [0,1]
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform((train_samples).reshape(-1,1))

In [22]:
# print scaled data
for i in scaled_train_samples[0:9]:
    print(i)

[0.5862069]
[0.75862069]
[0.1954023]
[0.68965517]
[0.51724138]
[0.79310345]
[0.25287356]
[0.73563218]
[0.36781609]
