In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from collections import defaultdict

In [2]:
 # Load the data
data = {
    "eod_balance_training": pd.read_csv(
        "gs://berkabank/production/data/04_processing/eod_balance_training.csv"
    ),
    "incidents": pd.read_csv(
        "gs://berkabank/production/data/03_primary/incidents.csv"
    ),
}

In [3]:
data["eod_balance_training"].columns

Index(['account_id', 'balance_date', 'end_of_day_balance', 'daily_amount_flow',
       'n_transactions', 'days_since_account_creation', 'low_balance_streak',
       'district_id'],
      dtype='object')

In [8]:
from dataclasses import dataclass
import pandas as pd
from typing import Dict, List, Any, Callable


@dataclass
class IncidentFeatures:
    incidents: pd.DataFrame
    column_mapping: dict

    def run(self):
        print("----- Running IncidentFeatures...")
        incident_features = pd.DataFrame()
        print("----- IncidentFeatures completed.")
        return incident_features


@dataclass
class EODBFeatures:
    eod_balance_training: pd.DataFrame
    column_mapping: dict
    aggregations:dict

    def run(self):
        print("----- Running EODBFeatures...")
        eodb_features_output = pd.DataFrame()
        print("----- EODBFeatures completed.")
        return eodb_features_output


@dataclass
class PrimaryFeatures:
    incident_features: IncidentFeatures
    eodb_features: EODBFeatures

    def __post_init__(self):
        print("--- Initializing PrimaryFeatures...")
        self.incident_features_output = self.incident_features.run()
        self.eodb_features_output = self.eodb_features.run()
        print("--- PrimaryFeatures initialized.")

    def run(self):
        print("--- Running PrimaryFeatures...")
        primary_features_output = pd.DataFrame()
        print("--- PrimaryFeatures completed.")
        return primary_features_output


@dataclass
class DerivedFeatures:
    primary_features: PrimaryFeatures

    def __post_init__(self):
        print("--- Initializing DerivedFeatures...")
        self.primary_features_output = self.primary_features.run()
        print("--- DerivedFeatures initialized.")

    def run(self):
        print("--- Running DerivedFeatures...")
        derived_features = pd.DataFrame()
        print("--- DerivedFeatures completed.")
        return derived_features


@dataclass
class FeatureEngineering:
    primary_features: PrimaryFeatures
    derived_features: DerivedFeatures

    def run(self):
        print("Running FeatureEngineering...")
        self.derived_features.run()
        print("FeatureEngineering completed.")

In [9]:
column_mapping = {
    "incidents":{
        "account_id": "account_id",
        "incident_date": "incident_date",
        "district_id": "district_id",
        "t0": "t0",
        "t1": "t1",
    },
    "eod_balance_training":{
        "account_id": "account_id",
        "balance_date": "balance_date",
        "end_of_day_balance": "end_of_day_balance",
        "daily_amount_flow": "daily_amount_flow",
        "n_transactions": "n_transactions",
        "days_since_account_creation": "days_since_account_creation",
        "low_balance_streak": "low_balance_streak",
        "district_id": "district_id",
    }
    
}

aggregations = {
    "time_periods_days":[str(i) for i in range(1, 31)],
    "functions" : ["mean", "std", "min", "max", "sum"],
    "columns": ["end_of_day_balance" , "daily_amount_flow", "n_transactions"]
}

In [10]:
df = data["eod_balance_training"].loc[:,["account_id","balance_date","end_of_day_balance" , "daily_amount_flow", "n_transactions"]].copy()

In [11]:
# Configure each class with specific parameters
incident_features = IncidentFeatures(data["incidents"], column_mapping["incidents"])
eodb_features = EODBFeatures(
    data["eod_balance_training"], column_mapping["eod_balance_training"],aggregations
)
primary_features = PrimaryFeatures(
   
    incident_features,
    eodb_features,
)
derived_features = DerivedFeatures(primary_features)

# Run the FeatureEngineering pipeline
feature_engineering = FeatureEngineering(primary_features, derived_features)
features = feature_engineering.run()

--- Initializing PrimaryFeatures...
----- Running IncidentFeatures...
----- IncidentFeatures completed.
----- Running EODBFeatures...
----- EODBFeatures completed.
--- PrimaryFeatures initialized.
--- Initializing DerivedFeatures...
--- Running PrimaryFeatures...
--- PrimaryFeatures completed.
--- DerivedFeatures initialized.
Running FeatureEngineering...
--- Running DerivedFeatures...
--- DerivedFeatures completed.
FeatureEngineering completed.
