<a href="https://colab.research.google.com/github/JulesKouatchou/PythonMaterials/blob/master/regression_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import absolute_import, division, print_function
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

![NASA](http://www.nasa.gov/sites/all/themes/custom/nasatwo/images/nasa-logo.svg)


<center><h1><font size="+3" color="red">ASTG Python Camp (APyC)</font></h1></center>
---------

<center>
<h1>Advance Users</h1>
</center>

# Machine Learning Application With Tensorflow

### Load the modules

In [0]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

1.13.1


### Main Steps of a ML Program

<OL>
<LI> Define a question
<LI> Collect data
<LI> Visualize data (if possible)
<LI> Train algorithm
<LI> Test the Algorithm
<LI> Collect feedback
<LI> Refine the algorithm
<LI> Loop 4-7 until the results are satisfying
<LI> Use the model to make a prediction
</OL>

## Problem Statement

We consider the function: <br>
$$
f(x,y) = (1-(x^2 + y^3))e^{-\frac{1}{2}(x^2 + y^2)}
$$
<br>
defined in the domain $D=[-3,3] \times [-3,3]$.
<OL>
<LI> We randomnly select $n$ points in the domain $D$ and compute the function on those points to create a dataset containing the pairs points/values.
<LI> We use the dataset for training a ML algorithm.
<LI> We generate a uniform set of points in $D$ to test the algorithm.
</OL>

## Generating the data

#### Define the function

In [0]:
def ff(x,y):
    return (1-(x**2+y**3))*np.exp(-(x**2+y**2)/2)

#### Create the data

In [0]:
numDims = 2
nx = 30
ny = 30
numPoints = nx * ny

yt = np.zeros(numPoints)  # targets for training
yv = np.zeros(numPoints)  # targets for validation

xt = np.zeros((numPoints, numDims))  # grid points for training
xv = np.zeros((numPoints, numDims))  # grid points for validation

# Dataset for training
x = np.random.uniform(-3.0, 3.0, nx)
y = np.random.uniform(-3.0, 3.0, ny)

k = 0
for i in range(nx):
    for j in range(ny):
        xt[k,0] = x[i]
        xt[k,1] = y[j]
        yt[k] = ff(x[i],y[j])
        k += 1

# Dateset for validation
x = np.linspace(-3.0, 3.0, nx)
y = np.linspace(-3.0, 3.0, ny)

k = 0
for i in range(nx):
    for j in range(ny):
        xv[k,0] = x[i]
        xv[k,1] = y[j]
        yv[k] = ff(x[i],y[j])
        k += 1

#### Save dataset in csv file

In [0]:
import csv

trainInputsFile = 'train_dataFile.csv'  # csv file for x values and y values
with open(trainInputsFile, 'w') as fid:
    csv_fid = csv.writer(fid, delimiter=',')
    csv_fid.writerow(['x'+str(i) for i in range(numDims)]+['TargetValues'])
    for j in range(numPoints):
        csv_fid.writerow([xt[j,i] for i in range(numDims)] +[ yt[j]])

validInputsFile = 'valid_dataFile.csv'  # csv file for values to be predicted
with open(validInputsFile, 'w') as fid:
     csv_fid = csv.writer(fid, delimiter=',')
     csv_fid.writerow(['x'+str(i) for i in range(numDims)] + ['TargetValues'])
     for j in range(numPoints):
         csv_fid.writerow([xv[j,i] for i in range(numDims)] + [yv[j]])

## Data Gathering and Basic Analyses

#### Get the data to be used for training

In [0]:
train_data  = pd.read_csv('train_dataFile.csv')
print(train_data.head(5))

         x0        x1  TargetValues
0  2.422747 -1.296915     -0.061610
1  2.422747  1.587623     -0.133682
2  2.422747 -2.647292      0.021867
3  2.422747  0.787108     -0.208847
4  2.422747 -2.942616      0.014428


#### Get the data to be used for validation

In [0]:
valid_data  = pd.read_csv('valid_dataFile.csv')
print(valid_data.head(5))

#### Plot the data to be trained

In [0]:
from mpl_toolkits.mplot3d import Axes3D

threedee = plt.figure().gca(projection='3d')
threedee.scatter(train_data['x0'], train_data['x1'], train_data['TargetValues'])
threedee.set_xlabel('x')
threedee.set_ylabel('y')
threedee.set_zlabel('f(x,y)')
plt.show()

#### Display the joint distribution of the columns from the training set

In [0]:
sns.pairplot(train_data.drop(columns=["TargetValues"]))

#### Check the overall statistics

In [0]:
train_stats = train_data.describe()
train_stats.pop("TargetValues")
train_stats = train_stats.transpose()
print(train_stats)

#### Split features from labels

In [0]:
# Separate the target value, or "label", from the features.
# This label is the value that you will train the model to predict.
train_labels = train_data.pop('TargetValues')
valid_labels = valid_data.pop('TargetValues')

## Normailized the Data

In [0]:
# It is good practice to normalize features that use different scales and ranges. 
# Although the model might converge without feature normalization, 
# it makes training more difficult, and it makes the resulting model dependent on 
# the choice of units used in the input.

def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

# This normalized data is what we will use to train the model.
normed_train_data = norm(train_data)
normed_valid_data = norm(valid_data)

## Build the Model

#### Instantiate a sequentiel model ising keras

In [0]:
model = keras.Sequential([
        layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_data.keys())]),
        layers.Dense(64, activation=tf.nn.relu),
        layers.Dense(1) ])

#### Define the optimizer

In [0]:
optimizer = tf.keras.optimizers.RMSprop(0.001)

#### Compile the model

In [0]:
# Required to provide a loss function and an optimizer
model.compile(loss = 'mse',
              optimizer = optimizer,
              metrics = ['mae', 'mse'])

#### Inspect the model

In [0]:
model.summary()

#### Try the model

In [0]:
# 10 samples from the training data and call model.predict
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
print(example_result)

## Train the Model

In [0]:
# Train the model for 1000 epochs, and record the training and 
# validation accuracy in the history object

# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

# How many times we go through the entire dataset
EPOCHS = 1000

history = model.fit( normed_train_data, train_labels,    
                 epochs=EPOCHS, verbose=0, callbacks=[PrintDot()])
#epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[PrintDot()])

#### Visualize the model's training progress

In [0]:
# Use the stats stored in the history object.
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [0]:
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [Target]')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
           label = 'Val Error')
  plt.legend()
  plt.ylim([0,5])

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$Target^2$]')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
  plt.legend()
  plt.ylim([0,20])

plot_history(history)

In [0]:
loss, mae, mse = model.evaluate(normed_valid_data, valid_labels, verbose=0)
print("Testing set Mean Abs Error: {:5.2f} ".format(mae))

Testing set Mean Abs Error:  0.02 


## Make Prediction

In [0]:
valid_predictions = model.predict(normed_valid_data).flatten()

#### Do the 45-degree plot

In [0]:
plt.scatter(valid_labels, valid_predictions)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

#### Error distribution

In [0]:
error = valid_predictions - valid_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error")
_ = plt.ylabel("Count")

In [0]:
threedee = plt.figure().gca(projection='3d')
threedee.scatter(valid_data['x0'], valid_data['x1'], valid_predictions)
threedee.set_xlabel('x')
threedee.set_ylabel('y')
threedee.set_zlabel('f(x,y)')
plt.show()