In [1]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
!pip install keras_visualizer

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
import tensorflow as tf
import os

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from keras_visualizer import visualizer 

In [4]:
print(tf.__version__)

2.5.0


In [6]:
df_dataset = pd.read_csv("https://raw.githubusercontent.com/AGuyNextDoor/AProfNextDoor/main/Exercises/Hands%20on%20Machine%20Learning/house-prices-data/dataset.csv")

## Separating Data

In [7]:
train_dataset = df_dataset.sample(frac=0.8, random_state=0)
test_dataset = df_dataset.drop(train_dataset.index)

dataset_X = df_dataset.drop(["Id","SalePrice"], axis = 1)
dataset_id = df_dataset["Id"]
dataset_y = df_dataset["SalePrice"]

train_dataset_X = train_dataset.drop(["Id","SalePrice"], axis = 1)
train_dataset_id = train_dataset["Id"]
train_dataset_y = train_dataset["SalePrice"]

test_dataset_X = test_dataset.drop(["Id","SalePrice"], axis = 1)
test_dataset_id = test_dataset["Id"]
test_dataset_y = test_dataset["SalePrice"]

# or use the train_test splitter from scikit learn

# Write the example here with sklearn

In [8]:
print("Training dataset has", len(train_dataset_X), "values.")
print("Testing dataset has", len(test_dataset_X), "values.")

Training dataset has 1168 values.
Testing dataset has 292 values.


## Normalization

La normalisation est une transformation de la donnée qui permet de "normaliser" la moyenne et la deviation standard d'une distribution. Pour cela on déplace chaque valeur par l'écart et divisant par la déviation standard. Cela permet de ne pas avoir de valeurs trop de difference entre les differentes features de notre dataset et permet aussi de rendre notre convergence plus stable pour nos modèles. 

In [9]:
train_dataset.describe().transpose()[['mean', 'std']]

Unnamed: 0,mean,std
Id,730.949486,421.221360
MSSubClass,57.572774,42.053072
MSZoning,3.032534,0.631075
LotFrontage,200.055651,3121.409754
LotArea,10549.443493,9906.540386
...,...,...
MoSold,6.366438,2.701544
YrSold,2007.819349,1.318539
SaleType,7.482021,1.585502
SaleCondition,3.761130,1.134621


### The 'old' way : Pandas

In [10]:
def normalize(x, mean, std): 
    return ((x-mean)/std)

In [11]:
def normalizeColumn(df, colName):
    meanVal = df[colName].mean()
    stdVal = df[colName].std()
    print(meanVal, stdVal)

    newCol = df[colName].apply(lambda x: normalize(x,meanVal, stdVal))
    newMean = newCol.mean()
    newStd = newCol.std()
    print(newMean, newStd)
    
    return newCol

In [12]:
normalizeColumn(dataset_X, "MSZoning")

3.0287671232876714 0.6320174410566584
-2.822704018773001e-16 0.9999999999999998


0      -0.045516
1      -0.045516
2      -0.045516
3      -0.045516
4      -0.045516
          ...   
1455   -0.045516
1456   -0.045516
1457   -0.045516
1458   -0.045516
1459   -0.045516
Name: MSZoning, Length: 1460, dtype: float64

We can transform the complete dataset with a simple line

In [13]:
dataset_X-dataset_X.mean()/dataset_X.std()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,58.654929,-1.792221,64.943808,8448.946343,-14.561727,1.62154,-0.92473,-0.026171,2.139336,-0.225639,...,60.295758,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,-0.338311,496.198833,3.159453,0.57489
1,18.654929,-1.792221,79.943808,9598.946343,-14.561727,1.62154,-0.92473,-0.026171,0.139336,-0.225639,...,-0.704242,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,2.661689,495.198833,3.159453,0.57489
2,58.654929,-1.792221,67.943808,11248.946343,-14.561727,-1.37846,-0.92473,-0.026171,2.139336,-0.225639,...,41.295758,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,6.661689,496.198833,3.159453,0.57489
3,68.654929,-1.792221,59.943808,9548.946343,-14.561727,-1.37846,-0.92473,-0.026171,-1.860664,-0.225639,...,34.295758,271.640798,-0.116299,-0.270116,-0.068668,-0.087658,-0.338311,494.198833,3.159453,-3.42511
4,58.654929,-1.792221,83.943808,14258.946343,-14.561727,-1.37846,-0.92473,-0.026171,0.139336,-0.225639,...,83.295758,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,9.661689,496.198833,3.159453,0.57489
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,58.654929,-1.792221,61.943808,7915.946343,-14.561727,1.62154,-0.92473,-0.026171,2.139336,-0.225639,...,39.295758,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,5.661689,495.198833,3.159453,0.57489
1456,18.654929,-1.792221,84.943808,13173.946343,-14.561727,1.62154,-0.92473,-0.026171,2.139336,-0.225639,...,-0.704242,-0.359202,-0.116299,-0.270116,-0.068668,-0.087658,-0.338311,498.198833,3.159453,0.57489
1457,68.654929,-1.792221,65.943808,9040.946343,-14.561727,1.62154,-0.92473,-0.026171,2.139336,-0.225639,...,59.295758,-0.359202,-0.116299,-0.270116,-0.068668,2499.912342,2.661689,498.198833,3.159453,0.57489
1458,18.654929,-1.792221,67.943808,9715.946343,-14.561727,1.62154,-0.92473,-0.026171,2.139336,-0.225639,...,-0.704242,111.640798,-0.116299,-0.270116,-0.068668,-0.087658,1.661689,498.198833,3.159453,0.57489


### Normalization with Sklearn

### Normalization with TensorFlow 

Normalization can be added directy in our models so that we don't have to worry about it anymore !

### A Visualisation tool

In [2]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error J')
  plt.legend()
  plt.grid(True)

## Linear Regression

### A shallow network

`keras` used to live as a separate entity from `tensorflow`. It is an interface built on top of `theano` and `tensorflow` to simplify some of their non-beginner-friendly methods. It was integrated inside `tensorflow` in 2019. 

We can use the `tf.keras.Sequential` from the `tf.keras` API to build our first shallow model.

## Deep Neural Networks

Create a multi layered perceptron with `keras.Sequential`.
Your model needs at least three hidden layers!

## TF utilities

### Saving models

#### Saving checkpoints during training

#### Loading Weigths

### Keras Tuner

In [24]:
!pip install -q -U keras-tuner
!pip install tensorboard

In [25]:
import keras_tuner as kt
from tensorboard.plugins.hparams import api as hpapi