## Configuration

In [1]:
khujta_cfg = {
    # Project settings
    'input_file': 'data/test_client/comercializadora_transactions_10.csv',  # Input CSV file
    'out_dir': 'data',
    'client': 'test_client',
    
    # Analysis settings
    'analysis_dt': '2024-12-02',
    'language': 'EN',
    
    # Logging and performance
    'log_level': 'DEBUG',               # 'DEBUG', 'INFO', 'WARNING','ERROR', 'CRITICAL'
    
    # Column Mappings
    'column_mappings': {
        'in_dt': 'fecha',
        'in_trans_id': 'trans_id',
        'in_product': 'producto',
        'in_description': 'glosa',
        'in_cost': 'costo',
        'in_price': 'precio',
        'in_quantity': 'cantidad',
        'in_total': 'total',
        'in_customer_id': 'customer_id',
        'in_customer_name': 'customer_name',
        'in_customer_location': 'customer_location',
    },
    
    # Column Types
    'column_types' : {
        'in_dt': 'date',
        'in_trans_id': 'str',
        'in_product': 'str',
        'in_description': 'str',
        'in_cost': 'float',
        'in_price': 'float',
        'in_quantity': 'float',
        'in_total': 'float',
        'in_customer_id': 'str',
        'in_customer_name': 'str',
        'in_customer_location': 'str',
    },

}

## Logging and otuput

In [2]:
# Initialize logging from config
from src.logger import setup_logging
setup_logging(log_level=khujta_cfg.get('log_level', 'INFO'), config=khujta_cfg)


import os
out_path = f"{khujta_cfg['out_dir']}/{khujta_cfg['client']}"
os.makedirs(out_path, exist_ok=True)

📝 Run instance ID: test_client_20251008_110327 - Logging [DEBUG] to: logs\test_client_20251008_110327.log


## Play

## Play 2

## Preprocess

In [4]:
from src.khujta import Khujta
from src.preprocessing import preprocess_data

preprocess = (Khujta(khujta_cfg)
    .add_step("preprocessing", preprocess_data)
)
preprocess_data = preprocess.run()

### Attribn

In [5]:
preprocess_data.to_csv(f"{out_path}/preprocessed_data.csv", index=False)
preprocess_data

Unnamed: 0,in_trans_id,in_dt,in_product,in_description,in_cost,in_total,in_quantity,in_customer_id,in_customer_name,in_customer_location
0,AS000001_1,2024-12-01 10:58:00,DRY007,HARINA TEMPURA ESPECIAL 1KG,2200.0,6641.0,2.0,B017,PENSION VOLCAN,Temuco
1,AS000001_2,2024-12-01 10:58:00,FROZ007,PAPAS FRITAS CORTE BASTÓN 2.5KG,2200.0,3075.0,1.0,B017,PENSION VOLCAN,Temuco
2,AS000001_3,2024-12-01 10:58:00,FROZ003,BARRITAS MERLUZA EMPANADAS 1KG,4200.0,5966.0,1.0,B017,PENSION VOLCAN,Temuco
3,AS000001_4,2024-12-01 10:58:00,SUSH008,SALSA SOYA KIKKOMAN 1L,3200.0,4320.0,1.0,B017,PENSION VOLCAN,Temuco
4,AS000001_5,2024-12-01 10:58:00,SUSH008,SALSA SOYA KIKKOMAN 1L,3200.0,4327.0,1.0,B017,PENSION VOLCAN,Temuco
5,AS000003_1,2024-12-01 11:14:00,SUSH004,ARROZ SUSHI KOSHIHIKARI 5KG,12000.0,16929.0,1.0,R010,PARRILLA DEL LAGO PREMIUM,Villarrica
6,AS000023_12,2024-12-01 11:44:00,FROZ003,BARRITAS MERLUZA EMPANADAS 1KG,4200.0,29377.0,5.0,R004,MARISQUERIA EL MUELLE,Villarrica
7,AS000024_1,2024-12-01 10:06:00,FRESH004,CENTOLLA FRESCA KG,15000.0,60772.0,3.0,R005,SUSHI TOKYO TEMUCO,Temuco


## Features

### Filters

In [6]:
import numpy as np
import pandas as pd

## CUSTOM - INI ***
FLOATPRECISION = 0.0000001
## CUSTOM - END ***

def hour(in_dt):
    return  pd.Timestamp(in_dt).hour

### Attributes

In [7]:
import numpy as np
import pandas as pd

## CUSTOM - INI ***
FLOATPRECISION = 0.0000001

def total_transacciones(in_trans_id):
    return np.count_nonzero(in_trans_id)

def total_unidades_vendidas(in_quantity):
    return np.sum(in_quantity)


# product_metrics = df.groupby('producto_clean').agg({
#     'trans_id': 'count',
#     'cantidad': 'sum',
#     'total': 'sum',
#     'costo': 'sum',
#     'fecha': ['min', 'max']
# }).reset_index()

# product_metrics.columns = [
#     'producto', 'total_transacciones', 'total_unidades_vendidas',
#     'total_ingresos', 'total_costo', 'primera_venta', 'ultima_venta'
# ]

# product_metrics['dias_activo'] = (
#     product_metrics['ultima_venta'] - product_metrics['primera_venta']
# ).dt.days + 1

## Execution

In [8]:
from src.fidx import get_dependencies

cfg_PM = {
    'model_name': 'product_stats',
    'group_by': ['in_product'],
    'features': {
        'hour': hour,
        'total_transacciones': total_transacciones,
        'total_unidades_vendidas': total_unidades_vendidas,
        # 'total_quantity': (['in_quantity'], sum, True),
        # 'total_sales': (['in_total'], sum, True),
        # 'avg_price': (['in_price'], lambda x: sum(x)/len(x) if len(x) > 0 else 0, True),
        # 'min_price': (['in_price'], min, True),
        # 'max_price': (['in_price'], max, True),
        # 'num_transactions':
    }
}

PM_output_cols = ['hour','total_transacciones','total_unidades_vendidas']
cfg_fidx = {'type': 'local', 'path': 'feature_store'}
cfg_PM = get_dependencies(cfg_fidx, cfg_PM['model_name'], cfg_PM, PM_output_cols)
cfg_PM

{'model_name': 'product_stats',
 'group_by': ['in_product'],
 'features': {'hour': <function __main__.hour(in_dt)>,
  'total_transacciones': <function __main__.total_transacciones(in_trans_id)>,
  'total_unidades_vendidas': <function __main__.total_unidades_vendidas(in_quantity)>},
 'in_cols': ['in_dt', 'in_trans_id', 'in_quantity'],
 'exec_seq': ['hour', 'total_transacciones', 'total_unidades_vendidas'],
 'out_cols': ['hour', 'total_transacciones', 'total_unidades_vendidas']}

In [12]:
from src.modeling import calc_datasets

filtrs_data, attrs_data = calc_datasets(preprocess_data, cfg_PM)
filtrs_data


Unnamed: 0,in_trans_id,in_dt,in_product,in_description,in_cost,in_total,in_quantity,in_customer_id,in_customer_name,in_customer_location,hour
0,AS000001_1,2024-12-01 10:58:00,DRY007,HARINA TEMPURA ESPECIAL 1KG,2200.0,6641.0,2.0,B017,PENSION VOLCAN,Temuco,10
1,AS000001_2,2024-12-01 10:58:00,FROZ007,PAPAS FRITAS CORTE BASTÓN 2.5KG,2200.0,3075.0,1.0,B017,PENSION VOLCAN,Temuco,10
2,AS000001_3,2024-12-01 10:58:00,FROZ003,BARRITAS MERLUZA EMPANADAS 1KG,4200.0,5966.0,1.0,B017,PENSION VOLCAN,Temuco,10
3,AS000001_4,2024-12-01 10:58:00,SUSH008,SALSA SOYA KIKKOMAN 1L,3200.0,4320.0,1.0,B017,PENSION VOLCAN,Temuco,10
4,AS000001_5,2024-12-01 10:58:00,SUSH008,SALSA SOYA KIKKOMAN 1L,3200.0,4327.0,1.0,B017,PENSION VOLCAN,Temuco,10
5,AS000003_1,2024-12-01 11:14:00,SUSH004,ARROZ SUSHI KOSHIHIKARI 5KG,12000.0,16929.0,1.0,R010,PARRILLA DEL LAGO PREMIUM,Villarrica,11
6,AS000023_12,2024-12-01 11:44:00,FROZ003,BARRITAS MERLUZA EMPANADAS 1KG,4200.0,29377.0,5.0,R004,MARISQUERIA EL MUELLE,Villarrica,11
7,AS000024_1,2024-12-01 10:06:00,FRESH004,CENTOLLA FRESCA KG,15000.0,60772.0,3.0,R005,SUSHI TOKYO TEMUCO,Temuco,10


In [13]:
print(attrs_data)

  in_product  total_transacciones  total_unidades_vendidas
0     DRY007                    1                      2.0
1   FRESH004                    1                      3.0
2    FROZ003                    2                      6.0
3    FROZ007                    1                      1.0
4    SUSH004                    1                      1.0
5    SUSH008                    2                      2.0
