In [1]:
import os
os.chdir('../')
%pwd

'/home/paladin/Downloads/Bixi-OD-Matrix-Prediction/Bixi-OD-Matrix-Prediction'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingTimeseriesConfig:    
    root_dir: Path
    trained_od_model_path: Path    
    base_od_model_path: Path
    trained_tensor_model_path: Path    
    base_tensor_model_path: Path     
    training_od_data: Path    
    training_tensor_data: Path    
    params_od_size: list
    params_tensor_size: list
    params_epochs: int
    params_batch_size: int   
    learning_rate: float
    validation_ratio: float
    params_time_lag: int

@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    ckeckpoint_model_filepath: Path
    patience: int

In [3]:
from timeseriesPredictor.utils import read_yaml, create_directories
from timeseriesPredictor.constants import *

In [4]:
class configurationManeger:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 secret_filepath = SECRET_FILE_PATH,                 
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath) 
        self.secret = read_yaml(secret_filepath)        
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.ckeckpoint_model_filepath)

        create_directories([config.tensorboard_root_log_dir, model_ckpt_dir ])

        prepare_callbacks_config = PrepareCallbacksConfig(
           root_dir= config.root_dir,
           tensorboard_root_log_dir= config.tensorboard_root_log_dir,
           ckeckpoint_model_filepath=  config.ckeckpoint_model_filepath,
           patience = self.params.PATIENCE

        )

        return prepare_callbacks_config
    
    def get_timeseries_training_config(self) -> TrainingTimeseriesConfig:
        config= self.config.training_autoencoder        
        
        create_directories([config.root_dir])

        training_timeseries_config = TrainingTimeseriesConfig(
        root_dir= config.root_dir,
        trained_od_model_path= config.trained_od_model_path, 
        base_od_model_path = self.config.prepare_timeseries_base_model.base_od_model_path,
        trained_tensor_model_path= config.trained_tensor_model_path, 
        base_tensor_model_path = self.config.prepare_timeseries_base_model.base_tensor_model_path,
        training_od_data= self.config.data_transformation.local_train_od_dir,        
        training_tensor_data= self.config.data_transformation.local_train_tensor_dir,        
        params_epochs= self.params.EPOCHS, 
        params_batch_size= self.params.BATCH_SIZE,      
        params_od_size= self.params.OD_SIZE,
        params_tensor_size=self.params.TENSOR_SIZE,
        learning_rate= self.params.LEARNING_RATE,
        validation_ratio= self.params.VALIDATION_RATIO,
        params_time_lag= self.params.TIME_LAG
        )

        return training_timeseries_config

In [5]:
from timeseriesPredictor.utils import model_loss, convert_to_supervised
import keras
import pickle

2023-08-15 14:54:42.788876: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-15 14:54:42.846070: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-15 14:54:42.847270: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
class TrainingTimeseries:
    def __init__(self, config: TrainingTimeseriesConfig):
        self.config= config

    @staticmethod
    def get_base_model(path:Path):
        model = keras.models.load_model(path)
        return model  

    @staticmethod
    def load_pickle_file(path: Path):
        file= open(path, 'rb')
        obj = pickle.load(file)
        return obj
    
    @staticmethod
    def save_model(path:Path, model:keras.Model):
        model.save(path)

    def data_preparation(self, path: Path, mat_size):
        data = self.load_pickle_file(path)
        data = data.reshape(data.shape[0], data.shape[1]*data.shape[2])
        X, _ = convert_to_supervised(data, self.config.params_time_lag)
        X = X.reshape(X.shape[0], self.config.params_time_lag, mat_size[0], mat_size[1], 1 )        
        x = X[:, 0 : X.shape[1] - 1, :, :]
        y = X[:, 1 : X.shape[1], :, :]        
        return x, y

    def train(self, callback_list: list): 
        mat_sizes = [self.config.params_od_size, self.config.params_tensor_size]
        training_data_path = [self.config.training_od_data, self.config.training_tensor_data]
        base_model_paths = [self.config.base_od_model_path, self.config.base_tensor_model_path] 
        trained_model_paths = [self.config.trained_od_model_path, self.config.trained_tensor_model_path]       
        
        for mat_size, path, base_model_path, trained_model_path in zip(mat_sizes, training_data_path, base_model_paths, trained_model_paths):
            model = self.get_base_model(base_model_path) 
            X_train, y_train = self.data_preparation(path, mat_size)

            history = model.fit(X_train, y_train,
                validation_split=self.config.validation_ratio,           
                epochs= self.config.params_epochs,
                batch_size = self.config.params_batch_size,            
                callbacks = callback_list
                )
        
            model_loss(history)
            self.save_model(path=trained_model_path, model= model)

In [7]:
import sys
from timeseriesPredictor.exception import CustomException
from timeseriesPredictor.components.prepare_callbacks import PrepareCallbacks 

In [8]:
try:
    config = configurationManeger()
    prepare_callbacks_config = config.get_prepare_callbacks_config()
    prepare_callbacks = PrepareCallbacks(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_es_callbacks()

    training_config = config.get_timeseries_training_config()
    training = TrainingTimeseries(config= training_config)   
    training.train(callback_list= callback_list)

except Exception as e:
    raise CustomException(e, sys)

 2/17 [==>...........................] - ETA: 13:26 - loss: 0.6840