# Emerging Technologies Project Notebook

***

### Goals:

- Read in our data
- Split the data into two sets
- Train the data
- Make predictions based on speed and power
- Expose our data model to a webservice (saving the model localy to be accessed by our script) 

***

In [19]:
import pandas as pd
import numpy as np
import os

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense

In [2]:
# Save our dataset to a variable and print
powerProduction = pd.read_csv(
    "https://raw.githubusercontent.com/ianmcloughlin/2020A-machstat-project/master/dataset/powerproduction.csv")

powerProduction

Unnamed: 0,speed,power
0,0.000,0.000
1,0.125,0.000
2,0.150,0.000
3,0.225,0.000
4,0.275,0.000
5,0.325,4.331
6,0.400,5.186
7,0.450,3.826
8,0.501,1.048
9,0.526,5.553


In [3]:
# form a dataset variable from the values in our powerProduction
dataset = powerProduction.values
dataset

array([[  0.   ,   0.   ],
       [  0.125,   0.   ],
       [  0.15 ,   0.   ],
       [  0.225,   0.   ],
       [  0.275,   0.   ],
       [  0.325,   4.331],
       [  0.4  ,   5.186],
       [  0.45 ,   3.826],
       [  0.501,   1.048],
       [  0.526,   5.553],
       [  0.551,   6.498],
       [  0.576,   5.241],
       [  0.626,   4.22 ],
       [  0.676,   7.974],
       [  0.701,   9.746],
       [  0.726,   0.   ],
       [  0.751,   0.   ],
       [  0.801,   7.271],
       [  0.826,   0.795],
       [  0.851,   7.35 ],
       [  0.926,   1.996],
       [  0.951,   1.609],
       [  0.976,   2.064],
       [  1.001,   6.511],
       [  1.026,   0.   ],
       [  1.051,   2.085],
       [  1.076,   0.   ],
       [  1.101,   0.121],
       [  1.151,   1.208],
       [  1.251,   3.373],
       [  1.276,   6.51 ],
       [  1.301,   0.   ],
       [  1.401,   4.156],
       [  1.426,   2.005],
       [  1.502,   5.352],
       [  1.552,   0.   ],
       [  1.652,   7.221],
 

In [4]:
# split X (speed) and Y (power) table columns
X = dataset[:,0:1]
Y = dataset[:,1]

In [5]:
X # print

array([[ 0.   ],
       [ 0.125],
       [ 0.15 ],
       [ 0.225],
       [ 0.275],
       [ 0.325],
       [ 0.4  ],
       [ 0.45 ],
       [ 0.501],
       [ 0.526],
       [ 0.551],
       [ 0.576],
       [ 0.626],
       [ 0.676],
       [ 0.701],
       [ 0.726],
       [ 0.751],
       [ 0.801],
       [ 0.826],
       [ 0.851],
       [ 0.926],
       [ 0.951],
       [ 0.976],
       [ 1.001],
       [ 1.026],
       [ 1.051],
       [ 1.076],
       [ 1.101],
       [ 1.151],
       [ 1.251],
       [ 1.276],
       [ 1.301],
       [ 1.401],
       [ 1.426],
       [ 1.502],
       [ 1.552],
       [ 1.652],
       [ 1.677],
       [ 1.702],
       [ 1.827],
       [ 1.877],
       [ 1.902],
       [ 2.027],
       [ 2.077],
       [ 2.102],
       [ 2.177],
       [ 2.227],
       [ 2.252],
       [ 2.402],
       [ 2.477],
       [ 2.553],
       [ 2.578],
       [ 2.703],
       [ 2.828],
       [ 2.853],
       [ 2.953],
       [ 3.003],
       [ 3.028],
       [ 3.053

In [6]:
Y # Print

array([  0.   ,   0.   ,   0.   ,   0.   ,   0.   ,   4.331,   5.186,
         3.826,   1.048,   5.553,   6.498,   5.241,   4.22 ,   7.974,
         9.746,   0.   ,   0.   ,   7.271,   0.795,   7.35 ,   1.996,
         1.609,   2.064,   6.511,   0.   ,   2.085,   0.   ,   0.121,
         1.208,   3.373,   6.51 ,   0.   ,   4.156,   2.005,   5.352,
         0.   ,   7.221,   0.   ,   2.439,   0.   ,   2.359,   7.048,
         0.   ,   0.   ,   0.   ,   2.701,   2.869,   0.   ,   2.287,
         6.947,   2.37 ,   3.165,   0.667,   7.559,  14.131,   2.894,
         3.578,   2.633,   2.836,   3.063,   0.   ,   1.36 ,   4.334,
         2.167,   6.819,   0.   ,   5.3  ,   0.   ,   1.535,   9.89 ,
         0.   ,   2.27 ,   4.595,   0.   ,   0.   ,   0.   ,   4.453,
         4.415,   0.132,   2.03 ,   1.035,   4.437,   1.043,   0.   ,
         3.934,   6.824,  13.592,   0.82 ,  10.444,   0.   ,   7.06 ,
         0.946,   1.907,   1.681,   9.519,   2.973,   8.623,   4.525,
         0.932,   2.

In [17]:
#  scale our data down to between 0 and 1 in order to make it easier for it to be read by our trainer
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)

In [9]:
X_scale # print

array([[0.   ],
       [0.005],
       [0.006],
       [0.009],
       [0.011],
       [0.013],
       [0.016],
       [0.018],
       [0.02 ],
       [0.021],
       [0.022],
       [0.023],
       [0.025],
       [0.027],
       [0.028],
       [0.029],
       [0.03 ],
       [0.032],
       [0.033],
       [0.034],
       [0.037],
       [0.038],
       [0.039],
       [0.04 ],
       [0.041],
       [0.042],
       [0.043],
       [0.044],
       [0.046],
       [0.05 ],
       [0.051],
       [0.052],
       [0.056],
       [0.057],
       [0.06 ],
       [0.062],
       [0.066],
       [0.067],
       [0.068],
       [0.073],
       [0.075],
       [0.076],
       [0.081],
       [0.083],
       [0.084],
       [0.087],
       [0.089],
       [0.09 ],
       [0.096],
       [0.099],
       [0.102],
       [0.103],
       [0.108],
       [0.113],
       [0.114],
       [0.118],
       [0.12 ],
       [0.121],
       [0.122],
       [0.123],
       [0.125],
       [0.128],
       [

***
## Training

Use train_test_split function [1] which will split our dataset into a training set, a validation set and a test set
***

In [18]:
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.3)

X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5)

print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape, Y_val.shape, Y_test.shape)

(350, 1) (75, 1) (75, 1) (350,) (75,) (75,)


In [20]:
# Set up our architecture with neurons and their density
model = Sequential([
    Dense(32, activation='relu', input_shape=(1,)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid'),
])

# Set up our algorithm, loss function and other metrics
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# train our data by using the fit() function on our X and Y train
hist = model.fit(X_train, Y_train,
          batch_size=32, epochs=100,
          validation_data=(X_val, Y_val))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [14]:
model.evaluate(X_test, Y_test)[1]



0.09333333373069763

In [15]:
# saving the model localy to be accessed by our script
model.save(os.path.join("model.h5"))

***
### References

[1] train_test_split; https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html

***