In [22]:
import os

In [23]:
%pwd

'd:\\Machine Learning\\Wine-Classification'

In [24]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class DataTransformationConfig:
      root_dir: Path
      data_file: Path

In [25]:
from src.wineModel.utils import *
from src.wineModel.constants import *


class ConfigurationManager:
      def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH
      ):
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schema_filepath)

            create_directory([self.config.artifacts_root])
      
      def get_data_transformation_config(self) -> DataTransformationConfig:
            config = self.config.data_transformation

            create_directory([config.root_dir])

            get_data_transformation = DataTransformationConfig(
                  root_dir=config.root_dir,
                  data_file=config.data_file,
            )

            return get_data_transformation

In [26]:
import os
from src.wineModel import logger
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


class DataTransformation:
      def __init__(self, config: DataTransformationConfig):
            self.config = config

      
      def scaled_data(self):
            df = pd.read_csv(self.config.data_file)
            x = df.drop(columns="quality")
            y = df['quality']
            scaler = StandardScaler()
            x_scale = scaler.fit_transform(x)

            x_scaled_df = pd.DataFrame(x_scale, columns=x.columns)
            y_df = pd.DataFrame(y, columns=['quality'])

            return x_scaled_df, y_df
      
      def train_test_split(self):
            x, y = self.scaled_data()
            x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.25, random_state=42)

            train = pd.concat([x_train, y_train], axis=1)
            test = pd.concat([x_val, y_val], axis=1)
            
            train.to_csv(os.path.join(self.config.root_dir, "train.csv"), index=False)
            test.to_csv(os.path.join(self.config.root_dir, "test.csv"), index=False)

            logger.info("Splitted data into train test split")
            logger.info(train.shape)
            logger.info(test.shape)

In [27]:
try:
      config = ConfigurationManager()
      data_transformation = config.get_data_transformation_config()
      validate = DataTransformation(config=data_transformation)
      validate.train_test_split()
except Exception as e:
      raise e

[2026-02-12 17:56:15,058: INFO: __init__: yaml file: config\config.yaml is loaded successfully]
[2026-02-12 17:56:15,064: INFO: __init__: yaml file: params.yaml is loaded successfully]
[2026-02-12 17:56:15,073: INFO: __init__: yaml file: schema.yaml is loaded successfully]
[2026-02-12 17:56:15,078: INFO: __init__: Directory is created at: artifacts]
[2026-02-12 17:56:15,081: INFO: __init__: Directory is created at: artifacts/data_transformation]
[2026-02-12 17:56:15,249: INFO: 2109809977: Splitted data into train test split]
[2026-02-12 17:56:15,257: INFO: 2109809977: (857, 12)]
[2026-02-12 17:56:15,257: INFO: 2109809977: (286, 12)]
