In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
%pwd

'd:\\Projects\\ML Projects\\End-to-End Wine Quality\\End-to-End-ML-Project'

In [10]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [11]:
from MLProject.constants import *
from MLProject.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH, schema_filepath=SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )
        return data_transformation_config

In [13]:
import os
from MLProject import logger
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.data = pd.read_csv(self.config.data_path)
    
    def Transform(self):
        data = self.data
        scaler = StandardScaler()
        input_columns = data.drop(columns=[data.columns[-1]])
        target_column = data[data.columns[-1]]
        scaled_input = scaler.fit_transform(input_columns)
        scaled_input = pd.DataFrame(scaled_input, columns=input_columns.columns)
        scaled_input['quality'] = target_column.values
        scaled_input['quality'] = scaled_input['quality'].apply(lambda y: 1 if y >= 6 else 0 )
        
        train, test = train_test_split(scaled_input, test_size=0.2, random_state=101)
        train.to_csv(os.path.join(self.config.root_dir, "train.csv"), index=False)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"), index=False)

        logger.info("Standardized and split the data into training and testing sets.")
        logger.info(f"Train shape: {train.shape}")
        logger.info(f"Test shape: {test.shape}")
        
        print(train.shape)
        print(test.shape)


In [14]:
try:
    config = ConfigurationManager()
    data_transform_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transform_config)
    data_transformation.Transform()
except Exception as e:
    raise e

[2025-04-25 19:38:20,036: INFO: common: yaml file: config\config.yaml loaded successfully.]
[2025-04-25 19:38:20,041: INFO: common: yaml file: params.yaml loaded successfully.]
[2025-04-25 19:38:20,043: INFO: common: yaml file: schema.yaml loaded successfully.]
[2025-04-25 19:38:20,049: INFO: common: Created directory at: artifacts]
[2025-04-25 19:38:20,050: INFO: common: Created directory at: artifacts/data_transformation]
[2025-04-25 19:38:20,179: INFO: 3777058894: Standardized and split the data into training and testing sets.]
[2025-04-25 19:38:20,179: INFO: 3777058894: Train shape: (914, 12)]
[2025-04-25 19:38:20,179: INFO: 3777058894: Test shape: (229, 12)]
(914, 12)
(229, 12)
