In [9]:
import os 

# get the current working directory 
%pwd

# change the working directory to parent directory
working_dir = "G:/ML_DL_Projects/MLOPS_Indian_Flight_Price_Prediction"
os.chdir(working_dir)

print(f"Current working directory is {os.getcwd()} ")

Current working directory is G:\ML_DL_Projects\MLOPS_Indian_Flight_Price_Prediction 


In [10]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataTransformationConfig:
    root_dir : Path
    data_path : Path

In [11]:
from src.indian_flight_price_prediction.constants import *
from src.indian_flight_price_prediction.utils.common import read_yaml, create_directories

In [12]:
class ConfigurationManager:
    def __init__(self,
                config_file_path = CONFIG_FILE_PATH,
                params_file_path = PARAMS_FILE_PATH,
                schema_file_path = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        create_directories ([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
        )

        return data_transformation_config

In [13]:
import os
from src.indian_flight_price_prediction.logger import logger
from sklearn.model_selection import train_test_split
import pandas as pd

In [14]:
class DataTransformation:
    def __init__(self, config : DataTransformationConfig):
        self.config = config

    # All the data transformation and data- preprocessing such as scalar, PCA and many more at this stage before passing it to ML training pipeline

    def train_test_splitting(self):
        data = pd.read_csv(self.config.data_path)

        # split the data into training and test set (0.75, 0.25)
        train, test = train_test_split(data, train_size= 0.75)

        train.to_csv(os.path.join(self.config.root_dir,"train.csv"), index = False)
        test.to_csv(os.path.join(self.config.root_dir,"test.csv"), index = False)

        logger.info("Splited data into training and test split")
        logger.info(train.shape)
        logger.info(test.shape)

        print(train.shape)
        print(test.shape)

In [15]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config = data_transformation_config)
    data_transformation.train_test_splitting()

except Exception as e:
    raise e

[2025-04-22 14:07:59,174 : INFO : common : yaml file : config\config.yaml loaded successfully.]
[2025-04-22 14:07:59,189 : INFO : common : yaml file : params.yaml loaded successfully.]
[2025-04-22 14:07:59,208 : INFO : common : yaml file : schema.yaml loaded successfully.]
[2025-04-22 14:07:59,211 : INFO : common : Created directory at artifacts]
[2025-04-22 14:07:59,213 : INFO : common : Created directory at artifacts/data_transformation]
[2025-04-22 14:08:03,403 : INFO : 951862613 : Splited data into training and test split]
[2025-04-22 14:08:03,412 : INFO : 951862613 : (155080, 11)]
[2025-04-22 14:08:03,419 : INFO : 951862613 : (51694, 11)]
(155080, 11)
(51694, 11)
