In [76]:
import os 
import sys

In [77]:
os.getcwd()

'd:\\iNeuron\\Complete Project\\Boston-Regression\\Boston_Housing-Regression'

In [78]:
os.chdir('d:\\iNeuron\\Complete Project\\Boston-Regression\\Boston_Housing-Regression')

In [79]:
os.getcwd()

'd:\\iNeuron\\Complete Project\\Boston-Regression\\Boston_Housing-Regression'

In [80]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
     preprocessed_obj_file_path: Path
     scaled_train_path : Path
     scaled_test_path : Path

In [81]:
from src.utils import create_directories, read_yaml
from src.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.logger import logging

class ConfigurationManager:
    def __init__(self,
                 config_file_path=CONFIG_FILE_PATH,
                 params_file_path = PARAMS_FILE_PATH):
        try:
            self.config = read_yaml(config_file_path)
            self.params = read_yaml(params_file_path)
            create_directories([self.config.artifacts_root])
        except Exception as e:
            raise e
        
    def get_data_transformation_config(self):
        try:
            logging.info('Data Transformation config started.')
            config = self.config.data_transformation

            preprocessed_obj_file_path=config.preprocessed_obj_file_path
            scaled_train = config.scaled_train_path
            scaled_test = config.scaled_test_path

            data_transforamtion_config = DataTransformationConfig(
                preprocessed_obj_file_path=preprocessed_obj_file_path,
                scaled_train_path=scaled_train,
                scaled_test_path=scaled_test
            )

            return data_transforamtion_config
        except Exception as e:
            raise e 

In [82]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

from src.utils import save_obj


train_path = r'Boston_Housing-Regression\artifact\data_ingestion\train.csv'
test_path = r'Boston_Housing-Regression\artifact\data_ingestion\test.csv'

final_train_path = os.path.join(os.path.dirname(os.getcwd()), train_path)
final_test_path = os.path.join(os.path.dirname(os.getcwd()), test_path)


class DataTransformation:
    def __init__(self, config: DataTransformationConfig, train_path, test_path):
        self.config = config
        train_path = train_path
        test_path=test_path

    def initiate_data_transformation(self):
        logging.info('Initiating Data Transformation')
        train_df = pd.read_csv(final_train_path)
        test_df = pd.read_csv(final_test_path)

        train_target = train_df['MEDV']
        test_target = test_df['MEDV']
        new_train_df = train_df.drop('MEDV', axis=1)
        new_test_df = test_df.drop('MEDV', axis=1)
        
        scaler = StandardScaler()
        scaled_train_df = pd.DataFrame(scaler.fit_transform(new_train_df), columns = new_train_df.columns)
        scaled_test_df = pd.DataFrame(scaler.transform(new_test_df), columns = new_test_df.columns)

        final_train_df=pd.concat([scaled_train_df,train_target], axis=1)
        final_test_df=pd.concat([scaled_test_df,test_target], axis=1)

        os.makedirs(os.path.dirname(self.config.preprocessed_obj_file_path), exist_ok=True)

        save_obj(self.config.preprocessed_obj_file_path, scaler)

        final_train_df.to_csv(self.config.scaled_train_path, index=False, header=True)
        final_test_df.to_csv(self.config.scaled_test_path, index=False, header=True)
        logging.info('Train and test set has been preprocessed')

        logging.info('Data Transformation completed')
        logging.info('\n')
        return self.config.scaled_train_path, self.config.scaled_test_path

In [83]:
train_df = pd.read_csv(final_train_path)
new_train_df = train_df.drop('MEDV', axis=1)
new_train_df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.14150,0.0,6.91,0,0.448,6.169,6.6,5.7209,3,233.0,17.9,383.37,5.81
1,0.15445,25.0,5.13,0,0.453,6.145,29.2,7.8148,8,284.0,19.7,390.68,6.86
2,16.81180,0.0,18.10,0,0.700,5.277,98.1,1.4261,24,666.0,20.2,396.90,30.81
3,0.05646,0.0,12.83,0,0.437,6.232,53.7,5.0141,5,398.0,18.7,386.40,12.34
4,8.79212,0.0,18.10,0,0.584,5.565,70.6,2.0635,24,666.0,20.2,3.65,17.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,0.03548,80.0,3.64,0,0.392,5.876,19.1,9.2203,1,315.0,16.4,395.18,9.25
400,0.09164,0.0,10.81,0,0.413,6.065,7.8,5.2873,4,305.0,19.2,390.91,5.52
401,5.87205,0.0,18.10,0,0.693,6.405,96.0,1.6768,24,666.0,20.2,396.90,19.37
402,0.33045,0.0,6.20,0,0.507,6.086,61.5,3.6519,8,307.0,17.4,376.75,10.88


In [84]:
train_path = r'Boston_Housing-Regression\artifact\data_ingestion\train.csv'
test_path = r'Boston_Housing-Regression\artifact\data_ingestion\test.csv'

final_train_path = os.path.join(os.path.dirname(os.getcwd()), train_path)
final_test_path = os.path.join(os.path.dirname(os.getcwd()), test_path)

if __name__=='__main__':
    config=ConfigurationManager()
    data_transforamtion_config=config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transforamtion_config, train_path=final_train_path, test_path=final_test_path)
    data_transformation.initiate_data_transformation()

In [85]:
import os
print(os.environ.get('PYTHONPATH'))

D:\iNeuron\Complete Project\Boston-Regression\Boston_Housing-Regression\src


In [86]:
import os
os.environ['PYTHONPATH'] = 'D:\iNeuron\Complete Project\Boston-Regression\Boston_Housing-Regression\src'

In [87]:
import os
print(os.environ.get('PYTHONPATH'))

D:\iNeuron\Complete Project\Boston-Regression\Boston_Housing-Regression\src


In [88]:
import dill
def load_object(file_path):
    try:
        with open(file_path, "rb") as file_obj:
            return dill.load(file_obj)

    except Exception as e:
        raise e

In [92]:
load_object(r'D:\iNeuron\Complete Project\Boston-Regression\Boston_Housing-Regression\artifact\data_transformation\preprocessor_object.pkl')

In [None]:
train_df = pd.read_csv(final_train_path)
test_df = pd.read_csv(final_test_path)

train_target = train_df['MEDV']
test_target = test_df['MEDV']
new_train_df = train_df.drop('MEDV', axis=1)
new_test_df = test_df.drop('MEDV', axis=1)

scaler = StandardScaler()
scaled_train_df = pd.DataFrame(scaler.fit_transform(new_train_df), columns = new_train_df.columns)
scaled_test_df = pd.DataFrame(scaler.transform(new_test_df), columns = new_test_df.columns)

In [None]:
scaler.transform(df)

array([[-0.29527447, -0.45260015, -1.47333359,  3.40954542,  3.82847158,
        -7.65395474, -2.34632924, -1.33409531, -0.97702399, -2.38274301,
        -8.04816228, -3.89544325, -1.63421371]])

In [None]:
preprocessor

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.399340,0.923185,-1.300817,-0.293294,-0.694015,0.842866,0.082879,-0.303729,-0.284942,-1.073665,-0.017821,0.434727,-0.728209
1,-0.401955,1.869037,-1.066897,-0.293294,-0.591232,0.620603,-0.404365,0.899742,-0.515636,-0.196998,-0.387032,0.434727,-0.776769
2,-0.379642,-0.495593,-0.609290,-0.293294,-0.899583,-0.346892,0.615692,0.879585,-0.746330,-1.008508,-0.248578,0.389227,0.835448
3,-0.398402,0.041823,-0.732098,-0.293294,-1.233630,-0.567702,-1.631238,1.261294,-0.630983,-0.345084,0.212936,0.427180,-0.649124
4,-0.002106,-0.495593,1.026692,-0.293294,1.858449,-1.317293,0.990765,-0.813129,1.675959,1.556337,0.812904,0.258523,-0.359147
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,-0.400138,0.708219,0.579319,-0.293294,-0.762538,0.251619,-0.502515,-0.075629,-0.630983,-0.789341,-0.110124,0.414055,-0.640799
98,-0.370221,-0.495593,-0.713092,-0.293294,-0.419925,2.901337,0.363307,-0.439824,-0.169594,-0.570174,-0.479335,0.305117,-1.198554
99,-0.392924,-0.495593,-0.368059,-0.293294,-0.282880,-0.107197,-0.474472,-0.494866,-0.515636,-0.114070,1.135964,0.397430,0.030727
100,0.935890,-0.495593,1.026692,-0.293294,0.265300,-0.627263,-0.288688,-0.863168,1.675959,1.556337,0.812904,-3.636771,0.403951


In [None]:
preprocessor

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.399340,0.923185,-1.300817,-0.293294,-0.694015,0.842866,0.082879,-0.303729,-0.284942,-1.073665,-0.017821,0.434727,-0.728209
1,-0.401955,1.869037,-1.066897,-0.293294,-0.591232,0.620603,-0.404365,0.899742,-0.515636,-0.196998,-0.387032,0.434727,-0.776769
2,-0.379642,-0.495593,-0.609290,-0.293294,-0.899583,-0.346892,0.615692,0.879585,-0.746330,-1.008508,-0.248578,0.389227,0.835448
3,-0.398402,0.041823,-0.732098,-0.293294,-1.233630,-0.567702,-1.631238,1.261294,-0.630983,-0.345084,0.212936,0.427180,-0.649124
4,-0.002106,-0.495593,1.026692,-0.293294,1.858449,-1.317293,0.990765,-0.813129,1.675959,1.556337,0.812904,0.258523,-0.359147
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,-0.400138,0.708219,0.579319,-0.293294,-0.762538,0.251619,-0.502515,-0.075629,-0.630983,-0.789341,-0.110124,0.414055,-0.640799
98,-0.370221,-0.495593,-0.713092,-0.293294,-0.419925,2.901337,0.363307,-0.439824,-0.169594,-0.570174,-0.479335,0.305117,-1.198554
99,-0.392924,-0.495593,-0.368059,-0.293294,-0.282880,-0.107197,-0.474472,-0.494866,-0.515636,-0.114070,1.135964,0.397430,0.030727
100,0.935890,-0.495593,1.026692,-0.293294,0.265300,-0.627263,-0.288688,-0.863168,1.675959,1.556337,0.812904,-3.636771,0.403951
