In [1]:
import os

In [2]:
os.chdir('../')
%pwd

'/home/paladin/Downloads/Bixi-OD-Matrix-Prediction/Bixi-OD-Matrix-Prediction'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    local_data_file: Path 
    local_train_OD_dir: Path
    local_val_OD_dir: Path
    local_test_OD_dir: Path
    local_train_tensor_dir: Path
    local_val_tensor_dir: Path
    local_test_tensor_dir: Path

In [4]:
from timeseriesPredictor.utils import create_directories, read_yaml
from timeseriesPredictor.constants import *

In [5]:
class configurationManeger:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 secret_filepath = SECRET_FILE_PATH,                 
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath) 
        self.secret = read_yaml(secret_filepath)        
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation             

        create_directories([config.root_dir,                            
                            config.local_train_OD_dir,
                            config.local_train_OD_dir,
                            config.local_val_OD_dir,
                            config.local_test_OD_dir,
                            config.local_train_tensor_dir,
                            config.local_val_tensor_dir,
                            config.local_test_tensor_dir]
                            )

        data_trnsformation_config = DataTransformationConfig(
            root_dir= config.root_dir,
            local_data_file=self.config.data_ingestion.local_data_file,           
            local_train_OD_dir= config.local_train_OD_dir,
            local_val_OD_dir= config.local_val_OD_dir,
            local_test_OD_dir= config.local_test_OD_dir,
            local_train_tensor_dir= config.local_train_tensor_dir,
            local_val_tensor_dir= config.local_val_tensor_dir,
            local_test_tensor_dir= config.local_test_tensor_dir          

        )

        return data_trnsformation_config

In [6]:
import sys
import os
import pandas as pd
from timeseriesPredictor.utils import OD_tensor_matrix, train_validation_test
from timeseriesPredictor.exception import CustomException
from timeseriesPredictor.logger import logging
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from keras.preprocessing.image import save_img
from box import ConfigBox

2023-08-10 18:00:13.807171: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 18:00:13.868220: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 18:00:13.869407: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def get_data_transformer_object(self):
        try:            
            OD_matrix_pipeline = Pipeline(
                steps=[
                    ('matrix_creation', 
                    FunctionTransformer(OD_tensor_matrix)),
                    ('train_val_test_split', 
                      FunctionTransformer(train_validation_test, kw_args={'train_test_ratio':0.75, 'train_val_ratio':0.15}))
                ]
            )

            tensor_matrix_pipeline = Pipeline(
                steps=[
                    ('matrix_creation', 
                    FunctionTransformer(OD_tensor_matrix, kw_args={'tensor': True})),
                    ('train_val_test_split', 
                      FunctionTransformer(train_validation_test, kw_args={'train_test_ratio':0.75, 'train_val_ratio':0.15}))
                ]
            )
            return  ConfigBox({'OD_matrix_pipeline': OD_matrix_pipeline, 
                               'tensor_matrix_pipeline': tensor_matrix_pipeline})           
        
        except Exception as e:
            raise CustomException(e, sys)
        
    
    def initiate_data_transformation(self):
        if not os.path.exists(self.config.local_data_file):
            logging.info(f"WARNING: {self.config.local_data_file} does not exist!")             
        
        else:  
            
            df = pd.read_csv(self.config.local_data_file)
            logging.info('Read data is completed!')
            df['start_date'] = pd.to_datetime(df['start_date'], format='ISO8601')
            logging.info('Changing object to datetime format is completed!')
           
            logging.info("Obtaining preprocessing object")
            preprocessing_obj = self.get_data_transformer_object()  

            logging.info(f"Applying OD preprocessing object on dataframe")
            matrix_train, matrix_val, matrix_test = preprocessing_obj.OD_matrix_pipeline.fit_transform(df)

            
            for i, matrix in enumerate(matrix_train):                
                path_file = os.path.join(self.config.local_train_OD_dir, str(i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Train OD set is saved as .png")   

            leght= len(matrix_train)
            for i, matrix in enumerate(matrix_val):                
                path_file = os.path.join(self.config.local_val_OD_dir, str(leght+i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Validation OD set is saved as .png")  

            leght= len(matrix_train) + len(matrix_val)
            for i, matrix in enumerate(matrix_test):                
                path_file = os.path.join(self.config.local_test_OD_dir, str(leght+i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Test OD set is saved as .png") 
  

            logging.info(f"Applying tensor preprocessing object on dataframe")
            matrix_train, matrix_val, matrix_test = preprocessing_obj.tensor_matrix_pipeline.fit_transform(df)
            
            for i, matrix in enumerate(matrix_train):                
                path_file = os.path.join(self.config.local_train_tensor_dir, str(i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Train tensor set is saved as .png")   

            leght= len(matrix_train)
            for i, matrix in enumerate(matrix_val):                
                path_file = os.path.join(self.config.local_val_tensor_dir, str(leght+i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Validation tensor set is saved as .png")  

            leght= len(matrix_train) + len(matrix_val)
            for i, matrix in enumerate(matrix_test):                
                path_file = os.path.join(self.config.local_test_tensor_dir, str(leght+i)+'.png')
                save_img(path_file, matrix)            
            logging.info(f"Test tensor set is saved as .png") 

In [8]:
try:
    config = configurationManeger()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.initiate_data_transformation()

except Exception as e:
    raise CustomException(e, sys)

100 64
