In [3]:
import os
%pwd

'd:\\Data Science\\END to END Proj\\Fertilizer_Pred_MLOPS'

In [2]:
os.chdir("../")

In [5]:
import pandas as pd

# Raw CSV paths
train_csv_path = "artifacts/data_ingestion/Fertilizer_Pred/train.csv"
original_csv_path = "artifacts/data_ingestion/Fertilizer_Pred/Fertilizer Prediction.csv"

# Load data
df_train = pd.read_csv(train_csv_path)
df_original = pd.read_csv(original_csv_path)

df_train.head()


Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,1,27,69,65,Sandy,Millets,30,6,18,28-28
2,2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,4,35,58,43,Red,Paddy,37,2,16,DAP


In [6]:
if 'id' in df_train.columns:
    df_train.drop(columns=['id'], inplace=True)


In [7]:
for col in df_train.select_dtypes(include=['number']).columns:
    df_train[f"cat_{col}"] = df_train[col].astype(str)
    df_original[f"cat_{col}"] = df_original[col].astype(str)


In [8]:
df_train["const"] = 1
df_original["const"] = 1


In [9]:
for col in df_train.select_dtypes(include=['object']).columns:
    df_train[col] = df_train[col].astype("category")
    df_original[col] = df_original[col].astype("category")


In [10]:
from sklearn.preprocessing import LabelEncoder

target = df_train.pop("Fertilizer Name")
target_org = df_original.pop("Fertilizer Name")

le = LabelEncoder()
target_encoded = le.fit_transform(target)
target_org_encoded = le.transform(target_org)


In [11]:
import joblib
import os

os.makedirs("artifacts/data_transformation", exist_ok=True)

df_train.to_csv("artifacts/data_transformation/transformed_train.csv", index=False)
df_original.to_csv("artifacts/data_transformation/transformed_original.csv", index=False)

# Save encoded targets
pd.DataFrame({"target": target_encoded}).to_csv("artifacts/data_transformation/train_labels.csv", index=False)
pd.DataFrame({"target": target_org_encoded}).to_csv("artifacts/data_transformation/original_labels.csv", index=False)

# Save the label encoder
joblib.dump(le, "artifacts/data_transformation/label_encoder.pkl")


['artifacts/data_transformation/label_encoder.pkl']

In [12]:
## ENTITY
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_data_path: Path
    original_data_path: Path
    transformed_train_path: Path
    transformed_original_path: Path
    train_label_path: Path
    original_label_path: Path
    label_encoder_path: Path

In [13]:
from src.Fertilizer_Pred.utils.common import read_yaml, create_directories
from src.Fertilizer_Pred.constant import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH

class ConfigurationManager:
    def __init__(self, 
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        return DataTransformationConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            original_data_path=config.original_data_path,
            transformed_train_path=config.transformed_train_path,
            transformed_original_path=config.transformed_original_path,
            train_label_path=config.train_label_path,
            original_label_path=config.original_label_path,
            label_encoder_path=config.label_encoder_path
        )


In [14]:
import pandas as pd
import joblib
import os
from sklearn.preprocessing import LabelEncoder

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def transform(self):
        df_train = pd.read_csv(self.config.train_data_path)
        df_original = pd.read_csv(self.config.original_data_path)

        if 'id' in df_train.columns:
            df_train.drop(columns=['id'], inplace=True)

        # Make categorical features from numericals
        for col in df_train.select_dtypes(include=['number']).columns:
            df_train[f"cat_{col}"] = df_train[col].astype(str)
            df_original[f"cat_{col}"] = df_original[col].astype(str)

        # Add const
        df_train["const"] = 1
        df_original["const"] = 1

        # Convert object to category
        for col in df_train.select_dtypes(include=['object']).columns:
            df_train[col] = df_train[col].astype("category")
            df_original[col] = df_original[col].astype("category")

        # Encode target
        target = df_train.pop("Fertilizer Name")
        target_org = df_original.pop("Fertilizer Name")

        le = LabelEncoder()
        target_encoded = le.fit_transform(target)
        target_org_encoded = le.transform(target_org)

        # Save all outputs
        df_train.to_csv(self.config.transformed_train_path, index=False)
        df_original.to_csv(self.config.transformed_original_path, index=False)
        pd.DataFrame({"target": target_encoded}).to_csv(self.config.train_label_path, index=False)
        pd.DataFrame({"target": target_org_encoded}).to_csv(self.config.original_label_path, index=False)
        joblib.dump(le, self.config.label_encoder_path)


In [16]:
try:
    config = ConfigurationManager()
    transformation_config = config.get_data_transformation_config()
    transformer = DataTransformation(config=transformation_config)
    transformer.transform()
except Exception as e:
    raise e


[2025-07-09 13:16:03,419: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-09 13:16:03,423: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-09 13:16:03,428: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-07-09 13:16:03,433: INFO: common: created directory at: artifacts]
[2025-07-09 13:16:03,436: INFO: common: created directory at: artifacts/data_transformation]
