In [1]:
import os
os.chdir("../")
%pwd

'd:\\EndToEnd\\MLFLOW\\mlflow-end-to-end-0713'

In [2]:
from dataclasses import dataclass
from pathlib import Path

In [9]:
@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [10]:
from mlproject_0713.constants import *
from mlproject_0713.utils.common import create_directories, read_yaml


class ConfigurationManager:
    def __init__(self,
                 config_path = CONFIG_FILE_PATH,
                 schema_path = SCHEMA_FILE_PATH,
                 params_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.schema = read_yaml(schema_path)
        self.params = read_yaml(params_path)

        create_directories([Path(self.config.artifacts_root)])
    
    def get_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        create_directories([Path(config.root_dir)])
        data_transformation_config = DataTransformationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path
        )
        return data_transformation_config


In [11]:
import os
from mlproject_0713 import logger
from sklearn.model_selection import train_test_split
import pandas as pd

class DataTransformation:
    def __init__(self,config=DataTransformationConfig):
        self.config = config
    
    def train_test_split(self):
        data = pd.read_csv(self.config.data_path)

        train,test = train_test_split(data)

        train.to_csv(os.path.join(self.config.root_dir,"train.csv"),index=False)
        test.to_csv(os.path.join(self.config.root_dir,"test.csv"),index=False)

        logger.info("Data Splitted into train and test")
        logger.info(f"Train Shape: {train.shape}")
        logger.info(f"Test Shape: {test.shape}")

        print (train.shape)
        print (test.shape)

In [12]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_transformation_config()
    data_transformation = DataTransformation(config = data_transformation_config)
    data_transformation.train_test_split()
except Exception as e:
    logger.exception(e)
    raise e


[2024-07-26 22:37:21,357: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-26 22:37:21,361: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-07-26 22:37:21,363: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-26 22:37:21,364: INFO: common: Created Directory at:artifacts]
[2024-07-26 22:37:21,366: INFO: common: Created Directory at:artifacts\data_transformation]
[2024-07-26 22:37:21,392: INFO: 2298537423: Data Splitted into train and test]
[2024-07-26 22:37:21,394: INFO: 2298537423: Train Shape: (1199, 12)]
[2024-07-26 22:37:21,396: INFO: 2298537423: Test Shape: (400, 12)]
(1199, 12)
(400, 12)
