In [1]:
import os

In [2]:
%pwd

'c:\\Users\\User\\Desktop\\PROJECTS\\stock\\Power-Time-Series-App\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\User\\Desktop\\PROJECTS\\stock\\Power-Time-Series-App'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data: Path

In [6]:
from powerTimeSeries.constants import *
from powerTimeSeries.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        
        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config.root_dir),
            data=Path(config.data),
          
        )

        return  data_transformation_config


In [8]:
import pandas as pd

In [19]:
class DataTransformation:
    def __init__(self, config:DataTransformationConfig):
        self.config = config
       
       
    def transformation_data(self):

        df = pd.read_csv(self.config.data)
        df = df.set_index('Datetime')
        df.index = pd.to_datetime(df.index)
        

        df = self.create_features(df)
        df = self.add_lags(df)

        self.save(df)


    def create_features(self, df):
        """
        Create time series features based on time series index.
        """
        df = df.copy()
        df['hour'] = df.index.hour
        df['dayofweek'] = df.index.dayofweek
        df['quarter'] = df.index.quarter
        df['month'] = df.index.month
        df['year'] = df.index.year
        df['dayofyear'] = df.index.dayofyear
        df['dayofmonth'] = df.index.day
        df['weekofyear'] = df.index.isocalendar().week

        return df
    
    def add_lags(self, df):
        target_map = df['PJME_MW'].to_dict()
        df['lag1'] = (df.index - pd.Timedelta('364 days')).map(target_map)
        df['lag2'] = (df.index - pd.Timedelta('728 days')).map(target_map)
        df['lag3'] = (df.index - pd.Timedelta('1092 days')).map(target_map)

        return df
    def save(self, df):
        # Create the 'transformed_data' directory
        transformed_data_dir = os.path.join("artifacts", "transformed_data")
        os.makedirs(transformed_data_dir, exist_ok=True)

        # Save the DataFrame to the 'data.csv' file inside the 'transformed_data' directory
        data_file_path = os.path.join(transformed_data_dir, "data.csv")
        df.to_csv(data_file_path, index=False)

        print("Data transformation complete")


In [20]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.transformation_data()
   
except Exception as e:
    raise e

[2023-07-23 14:10:46,031: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-07-23 14:10:46,036: INFO: common: yaml file: params.yaml loaded successfully]
[2023-07-23 14:10:46,040: INFO: common: created directory at: artifacts]
[2023-07-23 14:10:46,044: INFO: common: created directory at: artifacts/data_ingestion]
Data transformation complete
