In [42]:
%load_ext tensorboard

import tensorflow as tf
import datetime
from tensorflow import keras 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

# Import des données et process

In [41]:
column_names = ["Longitudinal position",
                "Prismatic coefficient",
                "Length-displacement ratio",
                "Beam-draught ratio",
                "Length-beam ratio",
                "Froude number",
                "Residuary resistance",
                ]


data = pd.read_csv('yacht_hydrodynamics.data', delim_whitespace=True, names = column_names)

print(data.head(5),'\n')


print(f"Il y a \n"
      f"{data.isnull().sum()} "
      f"\ndonnées manquantes")


print(data.describe())

   Longitudinal position  Prismatic coefficient  Length-displacement ratio  \
0                   -2.3                  0.568                       4.78   
1                   -2.3                  0.568                       4.78   
2                   -2.3                  0.568                       4.78   
3                   -2.3                  0.568                       4.78   
4                   -2.3                  0.568                       4.78   

   Beam-draught ratio  Length-beam ratio  Froude number  Residuary resistance  
0                3.99               3.17          0.125                  0.11  
1                3.99               3.17          0.150                  0.27  
2                3.99               3.17          0.175                  0.47  
3                3.99               3.17          0.200                  0.78  
4                3.99               3.17          0.225                  1.18   

Il y a 
Longitudinal position        0
Prismatic 

# Features et target

In [None]:
x = data.drop(columns=["Residuary resistance"])
y=data["Residuary resistance"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=int(time.time()))


print(f"La taille de x train est {x_train.shape}")
print(f"La taille de x test est {x_test.shape}")


scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# Création du modèle avec subclassing

In [33]:
class YachtModel(tf.keras.Model):
    def __init__(self):
        super(YachtModel, self).__init__()
        
        self.dense1 = keras.layers.Dense(32,kernel_regularizer=regularizers.l2(0.001), activation='relu')
        self.dense2 = keras.layers.Dense(32,kernel_regularizer=regularizers.l2(0.001), activation='relu')
        self.output_layer = keras.layers.Dense(1, activation=None)
        
    def call(self, inputs):
        
        x = self.dense1(inputs)
        x = self.dense2(x)
        
        return self.output_layer(x)
    
    
model = YachtModel()
model.compile(optimizer='adam', loss='mean_squared_error')
model.build(input_shape=(None,x_train.shape[1]))
model.summary()

Model: "yacht_model_87"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_261 (Dense)           multiple                  224       
                                                                 
 dense_262 (Dense)           multiple                  1056      
                                                                 
 dense_263 (Dense)           multiple                  33        
                                                                 
Total params: 1313 (5.13 KB)
Trainable params: 1313 (5.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Entrainement 

In [32]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")

kf = KFold(n_splits=5, shuffle=True, random_state=42)
all_scores = []
best_weights = None
best_val_loss = float("inf")


x_train = pd.DataFrame(x_train)
y_train = pd.DataFrame(y_train)



for fold, (train_index, val_index) in enumerate(kf.split(x_train)):
    x_train_fold, x_val_fold = x_train.iloc[train_index], x_train.iloc[val_index]
    y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

    for batch in [32, 64, 128]:
        for alpha in[0.01, 0.001, 0.0001]:
            print(f"entrainement avec batch de taille {batch} et learning rate de {alpha}")
            
            model = YachtModel()
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha), loss = 'mean_squared_error')
            checkpoint_cb = ModelCheckpoint("best_model.h5", monitor="val_loss", save_best_only=True, save_weights_only=True)
            model.fit(x_train_fold, y_train_fold,
                      batch_size = batch, 
                      epochs=10000, 
                      verbose=0, 
                      validation_data=(x_val_fold,y_val_fold),
                      callbacks=[checkpoint_cb,tensorboard_callback])
            
            model.load_weights("best_model.h5")
            val_loss = model.evaluate(x_val_fold, y_val_fold, verbose=0)
            all_scores.append(loss)
            
            print(f"Fold {fold + 1} validation loss: {val_loss}")
            
            if (val_loss
                    
                    
                    
                    < best_val_loss):
                best_val_loss = val_loss
                best_weights = model.get_weights()


model.set_weights(best_weights)


average_val_loss = np.mean(all_scores)
print(f"Moyenne des pertes de validation : {average_val_loss}")

entrainement avec batch de taille 32 et learning rate de 0.01
Fold 1 validation loss: 0.20835819840431213
entrainement avec batch de taille 32 et learning rate de 0.001
Fold 1 validation loss: 0.1604050248861313
entrainement avec batch de taille 32 et learning rate de 0.0001
Fold 1 validation loss: 0.8481006026268005
entrainement avec batch de taille 64 et learning rate de 0.01
Fold 1 validation loss: 0.1713026762008667
entrainement avec batch de taille 64 et learning rate de 0.001
Fold 1 validation loss: 0.3474327027797699
entrainement avec batch de taille 64 et learning rate de 0.0001
Fold 1 validation loss: 0.652191698551178
entrainement avec batch de taille 128 et learning rate de 0.01
Fold 1 validation loss: 0.19173076748847961
entrainement avec batch de taille 128 et learning rate de 0.001
Fold 1 validation loss: 0.6689635515213013
entrainement avec batch de taille 128 et learning rate de 0.0001
Fold 1 validation loss: 1.074098825454712
entrainement avec batch de taille 32 et lea

In [40]:
model.set_weights(best_weights)
result = model.evaluate(x_test, y_test)
print(f"Erreur quadratique moyenne sur les données de test : {result}")

Erreur quadratique moyenne sur les données de test : 0.38494014739990234
