In [1]:
from dataclasses import dataclass
from typing import Dict, Optional
import pandas as pd

In [None]:
@dataclass
class DataTransformationArtifact:
    users: pd.DataFrame
    cards: pd.DataFrame
    merchants: pd.DataFrame
    dates: pd.DataFrame
    transactions: pd.DataFrame

: 

In [2]:
import os
os.chdir('../../')
os.getcwd()

'c:\\Users\\roser\\OneDrive\\Documentos\\fint'

In [None]:
from datetime import datetime
import os
from src.constants import training_pipeline


class TrainingPipelineConfig:
    def __init__(self,timestamp=datetime.now()):
        timestamp =timestamp.strftime('%m_%d_%Y_%H_%M_S')
        self.pipeline_name=training_pipeline.PIPELINE_NAME
        self.artifact_name=training_pipeline.ARTIFACT_DIR
        self.artifact_dir=os.path.join(self.artifact_name,timestamp)
        self.model_dir=os.path.join('final_model')
        self.timestamp: str=timestamp

class DataTransformationConfig:
    def __init__(self,training_pipeline_config:TrainingPipelineConfig):
        self.data_transformation: str= os.path.join(training_pipeline_config.artifact_dir, training_pipeline.DATA_TRANSFORMATION_DIR_NAME)
        self.table_name: str= training_pipeline.DATA_TABLE_NAME
        self.data_transformation_transformed_data_dir:str = training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR
        self.data_transformation_transformed_object_dir:str = training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR

        



In [None]:
import sys
import os
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.pipeline import Pipeline

from src.entity.artifact_entity import (
    DataValidationArtifact
)

from src.exception.exception import FintechException
from src.logging.logger import logging
from src.utils.common import users_table,cards_table,transactions_table,merchants_table,date_table


from conn import engine



In [None]:
class DataTransformation:
    def __init__(self, data_validation_artifact: DataValidationArtifact, data_transformation_config: DataTransformationArtifact):
        try:
            self.data_validation_artifact:DataValidationArtifact=data_validation_artifact
            self.data_transformation_config:DataTransformationConfig=data_transformation_config
        
        except Exception as e:
            raise FintechException(e,sys)
        
    def create_database_conn(self):
        try:
            with engine.connect() as connection:
                connection.execute(users_table)
                connection.execute(cards_table)
                connection.execute(merchants_table)
                connection.execute(transactions_table)
                connection.execute(date_table)
        except Exception as e:
            raise FintechException(e, sys)

    def upload_to_db(self,df):
        df_users = df['users'][["id", "gender", "birth_year", "birth_month", "current_age", 
               "retirement_age", "per_capita_income", "yearly_income", 
               "total_debt", "credit_score", "num_credit_cards"]]

        df_cards = df['cards'][["id", "client_id", "card_brand", "card_type", "has_chip", 
                    "num_cards_issued", "credit_limit", "acct_open_date", 
                    "year_pin_last_changed", "card_on_dark_web"]]

        df_merchants = df['transactions'][["id", "merchant_city", "merchant_state", "zip", "mcc"]]

        df_dates = df['transactions'][["date"]].drop_duplicates()
        df_dates["year"] = df_dates["date"].dt.year
        df_dates["month"] = df_dates["date"].dt.month
        df_dates["day"] = df_dates["date"].dt.day
        df_dates["quarter"] = df_dates["date"].dt.quarter
        df_dates["week_of_year"] = df_dates["date"].dt.isocalendar().week

        df_transactions = df['transactions'][["id", "client_id", "card_id", "merchant_id", 
                            "date", "amount", "use_chip", "errors"]]
        

        try:
            df_users.to_sql('dim_users', con=engine, if_exists='append', index=False)
            df_cards.to_sql('dim_cards', con=engine, if_exists='append', index=False)
            df_merchants.to_sql('dim_merchants', con=engine, if_exists='append', index=False)
            df_dates.to_sql('dim_date', con=engine, if_exists='replace', index=False)
            df_transactions.to_sql('fact_transactions', con=engine, if_exists='append', index=False)


        except Exception as e:
            raise FintechException(e, sys) 
        
        return DataTransformationArtifact(
            users=df_users,
            cards=df_cards,
            merchants=df_merchants,
            dates=df_dates,
            transactions=df_transactions
        )
        
    def initiate_data_transformation(self,df) -> DataTransformationArtifact:
        logging.info('initiate data transformation and upload to db')
        try:
            logging.info('Create db if are need it')
            self.create_database_conn()
            logging.info('Database created')
            logging.info('uploading to db')
            data_transformation_artifact = DataTransformationArtifact(self.upload_to_db())
            
            return data_transformation_artifact

        except Exception as e:
            raise FintechException(e,sys)


        # Asegurar que 'errors' sea booleano
        #df_transactions["errors"] = df_transactions["errors"].notna()


NameError: name 'DataTransformationArtifact' is not defined