In [1]:
import os

In [2]:
%pwd

'd:\\End-to-End-Wine-Quality-predidection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\End-to-End-Wine-Quality-predidection'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class DataTransformationConfig:
    root_dir: Path
    data_path: Path


In [6]:
#Configuration Manager

from ML_Project.constants import *
from ML_Project.entity.config_entity import DataIngestionConfig, DataValidationConfig
from ML_Project.utils.common import create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath= SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([Path(self.config.artifact_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
    
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
            artifact_dir=self.config.artifact_root
        )
       
        return data_ingestion_config
    
    def get_data_validation_config(self) -> DataValidationConfig:
        config=self.config.data_validation
        root_dir=Path(config.root_dir)
        create_directories([root_dir])
        data_validation_config=DataValidationConfig(
            root_dir=Path(config.root_dir),
            unzip_data_dir=Path(config.unzip_data_dir),
            STATUS_FILE=Path(config.STATUS_FILE),
            all_schema=self.schema
        )
        return data_validation_config
    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config=self.config.data_transformation
        root_dir=Path(config.root_dir)
        create_directories([root_dir])
        data_transformation_config=DataTransformationConfig(
            root_dir=root_dir,
            data_path=Path(config.data_path)
        )
        return data_transformation_config

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split    
from ML_Project.logging import logger

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def train_test_split(self):
        # Implement your train-test split logic here
        try:
            df=pd.read_csv(self.config.data_path)
            train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
            train_data.to_csv(os.path.join(self.config.root_dir, "train_data.csv"), index=False)
            test_data.to_csv(os.path.join(self.config.root_dir, "test_data.csv"), index=False)
            logger.info("Train-test split completed successfully.")
            logger.info(f"Train data shape: {train_data.shape}")
            logger.info(f"Test data shape: {test_data.shape}")
            return train_data, test_data
        except Exception as e:
            logger.error(f"Error occurred while splitting data: {e}")
            return None, None


In [8]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation=DataTransformation(config=data_transformation_config)
    train_test_split=data_transformation.train_test_split()
except Exception as e:
    raise e

[2025-10-05 17:12:11,723] :INFO:common: Created directory at: artifacts :
[2025-10-05 17:12:11,724] :INFO:common: Created directory at: artifacts\data_transformation :
[2025-10-05 17:12:11,756] :INFO:312214312: Train-test split completed successfully. :
[2025-10-05 17:12:11,756] :INFO:312214312: Train data shape: (914, 13) :
[2025-10-05 17:12:11,756] :INFO:312214312: Test data shape: (229, 13) :
