# Regression linéaire sur le prix des maisons

## Import the libraries

In [28]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Load the data

In [29]:
(x_train, y_train), (x_test, y_test) = keras.datasets.boston_housing.load_data(test_split=0.3, seed=1)



# Loading csv data, another method
#data=pd.read_csv("BostonHousing.csv", header=0)

# Split of data
#data       = data.sample(frac=1., axis=0)
#data_train = data.sample(frac=0.7, axis=0)
#data_test  = data.drop(data_train.index)

#x_train = data_train.drop('medv',  axis=1) 'medv' is the house price
#y_train = data_train['medv']
#x_test  = data_test.drop('medv',   axis=1)
#y_test  = data_test['medv']   

## Data visualization

In [30]:
x_train=pd.DataFrame(x_train)
x_test=pd.DataFrame(x_test)
y_train=pd.DataFrame(y_train)
y_test=pd.DataFrame(y_test)

print('x_train : ',x_train.shape, 'y_train : ',y_train.shape)
print('x_test  : ',x_test.shape,  'y_test  : ',y_test.shape)
x_train.head(10)

x_train :  (354, 13) y_train :  (354, 1)
x_test  :  (152, 13) y_test  :  (152, 1)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.04932,33.0,2.18,0.0,0.472,6.849,70.3,3.1827,7.0,222.0,18.4,396.9,7.53
1,0.02543,55.0,3.78,0.0,0.484,6.696,56.4,5.7321,5.0,370.0,17.6,396.9,7.18
2,0.22927,0.0,6.91,0.0,0.448,6.03,85.5,5.6894,3.0,233.0,17.9,392.74,18.8
3,0.05789,12.5,6.07,0.0,0.409,5.878,21.4,6.498,4.0,345.0,18.9,396.21,8.1
4,3.67822,0.0,18.1,0.0,0.77,5.362,96.2,2.1036,24.0,666.0,20.2,380.79,10.19
5,0.59005,0.0,21.89,0.0,0.624,6.372,97.9,2.3274,4.0,437.0,21.2,385.76,11.12
6,0.03615,80.0,4.95,0.0,0.411,6.63,23.4,5.1167,4.0,245.0,19.2,396.9,4.7
7,1.35472,0.0,8.14,0.0,0.538,6.072,100.0,4.175,4.0,307.0,21.0,376.73,13.04
8,0.11069,0.0,13.89,1.0,0.55,5.951,93.8,2.8893,5.0,276.0,16.4,396.9,17.92
9,0.04684,0.0,3.41,0.0,0.489,6.417,66.1,3.0923,2.0,270.0,17.8,392.18,8.81


In [31]:
x_train.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
count,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0,354.0
mean,3.679883,10.983051,11.129181,0.064972,0.552345,6.313678,68.350282,3.732462,9.737288,412.254237,18.542938,352.176808,12.468757
std,8.691423,22.966498,6.819598,0.246825,0.11009,0.68715,27.945635,2.019922,8.834418,169.211227,2.086872,98.33074,7.056974
min,0.00632,0.0,0.74,0.0,0.385,3.561,2.9,1.1296,1.0,188.0,12.6,0.32,1.73
25%,0.082102,0.0,5.145,0.0,0.458,5.907,42.95,2.11105,4.0,284.0,17.4,372.555,6.7775
50%,0.253715,0.0,9.69,0.0,0.538,6.241,76.7,3.1423,5.0,335.0,19.1,391.28,10.685
75%,3.69599,9.375,18.1,0.0,0.6215,6.62975,93.875,5.1004,24.0,666.0,20.2,396.27,16.955
max,88.9762,95.0,27.74,1.0,0.871,8.78,100.0,10.7103,24.0,711.0,22.0,396.9,37.97


## Data normalization

Normalizing the data improves the performance of the model.

In [32]:
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean) / std
x_test  = (x_test  - mean) / std

x_train, y_train = np.array(x_train), np.array(y_train)
x_test,  y_test  = np.array(x_test),  np.array(y_test)

## Build the model

In [33]:
shape=x_train.shape[1] 

model = keras.models.Sequential()
model.add(keras.layers.Input(x_train.shape[1] , name="InputLayer")) 
model.add(keras.layers.Dense(32, activation='relu', name='Dense_n1'))
model.add(keras.layers.Dense(64, activation='relu', name='Dense_n2'))
model.add(keras.layers.Dense(32, activation='relu', name='Dense_n3'))
model.add(keras.layers.Dense(1, name='Output'))

model.compile(optimizer = 'adam',
              loss      = 'mse',
              metrics   = ['mae', 'mse'] )

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_n1 (Dense)            (None, 32)                448       
                                                                 
 Dense_n2 (Dense)            (None, 64)                2112      
                                                                 
 Dense_n3 (Dense)            (None, 32)                2080      
                                                                 
 Output (Dense)              (None, 1)                 33        
                                                                 
Total params: 4,673
Trainable params: 4,673
Non-trainable params: 0
_________________________________________________________________


## Train the model

In [34]:
history = model.fit(x_train,
                    y_train,
                    epochs          = 60,
                    batch_size      = 10,
                    verbose         =  1,
                    validation_data = (x_test, y_test))

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


## Evaluate the model

In [35]:
score = model.evaluate(x_test, y_test, verbose=0)

print('x_test / loss      : {:5.4f}'.format(score[0]))
print('x_test / mae       : {:5.4f}'.format(score[1]))
print('x_test / mse       : {:5.4f}'.format(score[2]))

x_test / loss      : 12.1380
x_test / mae       : 2.1313
x_test / mse       : 12.1380


In [36]:
df=pd.DataFrame(data=history.history)
display(df)
print("min( val_mae ) : {:.4f}".format( min(history.history["val_mae"]) ) )

Unnamed: 0,loss,mae,mse,val_loss,val_mae,val_mse
0,504.923065,20.651958,504.923065,383.03244,17.463428,383.03244
1,187.822968,11.17486,187.822968,61.297981,5.487352,61.297981
2,38.006332,4.617906,38.006332,42.801208,4.265944,42.801208
3,26.701687,3.757197,26.701687,34.843903,3.863535,34.843903
4,22.27055,3.427988,22.27055,30.308273,3.553699,30.308273
5,19.674833,3.236366,19.674833,27.087521,3.316792,27.087521
6,17.612541,3.039706,17.612541,25.624987,3.115376,25.624985
7,16.901548,2.946813,16.901548,23.895552,3.014055,23.895552
8,15.930024,2.894168,15.930024,22.814276,2.982256,22.814276
9,14.703799,2.759403,14.703799,21.438057,3.006603,21.438059


min( val_mae ) : 2.1206


## Make a prediction

In [37]:
 my_data = [ 1.26425925, -0.48522739,  1.0436489 , -0.23112788,  1.37120745,
       -2.14308942,  1.13489104, -1.06802005,  1.71189006,  1.57042287,
        0.77859951,  0.14769795,  2.7585581 ]
real_price = 10.4

my_data=np.array(my_data).reshape(1,13)

In [38]:
predictions = model.predict( my_data )
print("Prediction : {:.2f} K$".format(predictions[0][0]))
print("Reality    : {:.2f} K$".format(real_price))

Prediction : 9.30 K$
Reality    : 10.40 K$
