In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Sanju\\WORKSPACE\\End-to-End-MLOPS-AWS-Deployment-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Sanju\\WORKSPACE\\End-to-End-MLOPS-AWS-Deployment-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    preprocessor_path: Path
    train_data_path: Path
    test_data_path: Path
    target_column: str

In [6]:
from MLProject.constants import *
from MLProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir = config.root_dir,
            preprocessor_path=config.preprocessor_path,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            target_column=schema.name
        )

        return data_transformation_config

In [8]:
import os
from MLProject import logger
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from MLProject.entity.config_entity import DataTransformationConfig
from MLProject.utils.common import save_object
import pandas as pd
import numpy as np
import pickle

In [9]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def get_data_transformation_object(self):
        try:
            Nominal_categorical_cols = ['Drop_point','Dosage_Form','Pickup_Point']
            Ord_categorical_cols = ['Delivery_Status', 'Shipment_Mode']
            Numerical_cols = ['Line_Item_Quantity','Pack_Price','Unit_Price','Weight','Freight_Cost']

            delivery_status = ['Delivered Early', 'On time', 'Delayed']
            shipment_mode = ['Air', 'Truck', 'Air Charter', 'Ocean']

            num_pipeline = Pipeline(
                steps=[
                    ('imputer', SimpleImputer(strategy='median')),
                    ('scalar', StandardScaler())
                ]
            )

            ord_cat_pipeline= Pipeline(
                steps=[
                    ('imputer', SimpleImputer(strategy='most_frequent')),
                    ('ordinalencoder', OrdinalEncoder(categories=[delivery_status, shipment_mode]))
                ]
            )


            nom_cat_pipeline = Pipeline(
                steps=[
                    ('imputer',SimpleImputer(strategy='most_frequent')),
                    ('OneHotEncoder', OneHotEncoder(drop='first'))
                ]
            )

            preprocessor = ColumnTransformer([
                ('num_pipeline', num_pipeline, Numerical_cols),
                ('ord_cat_pipeline', ord_cat_pipeline, Ord_categorical_cols),
                ('nom_cat_pipeline', nom_cat_pipeline, Nominal_categorical_cols)

            ])

            return preprocessor

        except Exception as e:
            raise e
        
    
    def initiate_data_transformation(self):
        try:
            train_df = pd.read_csv(self.config.train_data_path)
            test_df = pd.read_csv(self.config.test_data_path)

            train_df['Drop_point'].replace("Côte d'Ivoire", "Côte d Ivoire")
            test_df['Drop_point'].replace("Côte d'Ivoire", "Côte d Ivoire")

            input_feature_train_df = train_df.drop(columns=['Line_Item_Value'])
            target_feature_train_df = train_df['Line_Item_Value']

            input_feature_test_df = test_df.drop(columns=['Line_Item_Value'])
            target_feature_test_df = test_df['Line_Item_Value']

            preprocessing_obj = self.get_data_transformation_object()

            input_feature_train_arr = pd.DataFrame(preprocessing_obj.fit_transform(input_feature_train_df), columns=preprocessing_obj.get_feature_names_out())
            input_feature_test_arr = pd.DataFrame(preprocessing_obj.transform(input_feature_test_df), columns=preprocessing_obj.get_feature_names_out())

            train_arr = pd.concat([input_feature_train_arr, target_feature_train_df], axis=1)
            test_arr = pd.concat([input_feature_test_arr, target_feature_test_df], axis=1)

            train_arr.columns=['Line_Item_Quantity', 'Pack_Price', 'Unit_Price', 'Weight', 'Freight_Cost', 'Delivery_Status', 'Shipment_Mode', "Drop_point_Côte d Ivoire", 'Drop_point_Ethiopia', 'Drop_point_Guyana', 'Drop_point_Haiti', 'Drop_point_Kenya', 'Drop_point_Mozambique', 'Drop_point_Nigeria', 'Drop_point_Others', 'Drop_point_Rwanda', 'Drop_point_South Africa', 'Drop_point_South_Sudan', 'Drop_point_Tanzania', 'Drop_point_Uganda', 'Drop_point_Vietnam', 'Drop_point_Zambia', 
                               'Drop_point_Zimbabwe', 'Dosage_Form_Tablet', 'Dosage_Form_Tablet - FDC', 'Dosage_Form_Test kit', 'Pickup_Point_Others', 'Line_Item_Value']

            test_arr.columns=['Line_Item_Quantity', 'Pack_Price', 'Unit_Price', 'Weight', 'Freight_Cost', 'Delivery_Status', 'Shipment_Mode', "Drop_point_Côte d Ivoire", 'Drop_point_Ethiopia', 'Drop_point_Guyana', 'Drop_point_Haiti', 'Drop_point_Kenya', 'Drop_point_Mozambique', 'Drop_point_Nigeria', 'Drop_point_Others', 'Drop_point_Rwanda', 'Drop_point_South Africa', 'Drop_point_South_Sudan', 'Drop_point_Tanzania', 'Drop_point_Uganda', 'Drop_point_Vietnam', 'Drop_point_Zambia', 
                              'Drop_point_Zimbabwe', 'Dosage_Form_Tablet', 'Dosage_Form_Tablet - FDC', 'Dosage_Form_Test kit', 'Pickup_Point_Others', 'Line_Item_Value']
        
            train_arr.to_csv(os.path.join(self.config.root_dir, 'trans_train.csv'), index=False)
            test_arr.to_csv(os.path.join(self.config.root_dir, 'trans_test.csv'), index=False)

            save_object(
                file_path=self.config.preprocessor_path,
                obj=preprocessing_obj
            )

            return (
                train_arr,
                test_arr,
                self.config.preprocessor_path
            )
        except Exception as e:
            print("Exception occurred during data transformation:", e)
            raise e

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.get_data_transformation_object()
    data_transformation.initiate_data_transformation() 
except Exception as e:
    raise e

[2024-02-24 03:58:10,637: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-24 03:58:10,639: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-24 03:58:10,641: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-24 03:58:10,643: INFO: common: created directory at: artifacts]
[2024-02-24 03:58:10,644: INFO: common: created directory at: artifacts/data_transformation]
num_pipeline__Line_Item_Quantity              0
num_pipeline__Pack_Price                      0
num_pipeline__Unit_Price                      0
num_pipeline__Weight                          0
num_pipeline__Freight_Cost                    0
ord_cat_pipeline__Delivery_Status             0
ord_cat_pipeline__Shipment_Mode               0
nom_cat_pipeline__Drop_point_Côte d'Ivoire    0
nom_cat_pipeline__Drop_point_Ethiopia         0
nom_cat_pipeline__Drop_point_Guyana           0
nom_cat_pipeline__Drop_point_Haiti            0
nom_cat_pipeline__Drop_point_Kenya            0

In [11]:
from pathlib import Path
import pickle

In [12]:
with open(Path(r'C:\Users\Sanju\WORKSPACE\End-to-End-MLOPS-AWS-Deployment-Project\artifacts\models\preprocessor.pkl'), 'rb') as  file:
    preprocessor = pickle.load(file)

In [24]:
f = pd.read_csv(r'C:\Users\Sanju\WORKSPACE\End-to-End-MLOPS-AWS-Deployment-Project\artifacts\data_validation\test.csv')

In [25]:
f.drop(columns=['Line_Item_Value'], inplace=True)

In [46]:
import pandas as pd

data = pd.DataFrame([['Zambia', 'Truck', 'Tablet', 47227, 5.78, 0.19, 4959.0, 9973.51, 535.03, 'Delivered Early', 'India']],
                    columns=['Drop_point', 'Shipment_Mode', 'Dosage_Form', 'Line_Item_Quantity', 'Pack_Price',
                             'Unit_Price', 'Weight', 'Freight_Cost', 'Line_Item_Insurance', 'Delivery_Status',
                             'Pickup_Point'])

print(data)


  Drop_point Shipment_Mode Dosage_Form  Line_Item_Quantity  Pack_Price  \
0     Zambia         Truck      Tablet               47227        5.78   

   Unit_Price  Weight  Freight_Cost  Line_Item_Insurance  Delivery_Status  \
0        0.19  4959.0       9973.51               535.03  Delivered Early   

  Pickup_Point  
0        India  


In [57]:
f.columns

Index(['Drop_point', 'Shipment_Mode', 'Dosage_Form', 'Line_Item_Quantity',
       'Pack_Price', 'Unit_Price', 'Weight', 'Freight_Cost',
       'Line_Item_Insurance', 'Delivery_Status', 'Pickup_Point'],
      dtype='object')

In [None]:
info = ['Zambia','Truck','Tablet',47227,5.78,0.19,4959.0,9973.51,535.03,'Delivered Early','India']

In [58]:
preprocessor.transform(pd.DataFrame([['Zambia','Truck','Tablet',47227,5.78,0.19,4959.0,9973.51,535.03,'Delivered Early','India']], columns=['Drop_point', 'Shipment_Mode', 'Dosage_Form', 'Line_Item_Quantity',
       'Pack_Price', 'Unit_Price', 'Weight', 'Freight_Cost',
       'Line_Item_Insurance', 'Delivery_Status', 'Pickup_Point']))

array([[ 0.72440818, -0.38892298, -0.12025582,  0.20977891,  0.08118985,
         0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  0.        ]])