# Configuration

In [None]:
# Pre-requisite Configuration
# ------------------------------------------------------------------------------ #
# For testing
# ------------------------------------------------------------------------------ #

%load_ext autoreload
%autoreload 2

import os 
from modules.util.config import get_config_by_id, get_config_global
from modules.util.helpers import Logger
from modules.util.database import SQLAlchemyClient


# ------------------------------------------------------------------------------ #
# Configuration
# ------------------------------------------------------------------------------ #

CONFIG_ID = 'CUSTOM_TEST'
TRANSFORM = get_config_by_id(CONFIG_ID)["transform"]["indicator"]
EXTRACTION_DIR = TRANSFORM["directory"]
REPORTS_DIR = f"{EXTRACTION_DIR}/reports"
CONFIG_GLOBAL = get_config_global().get('indicators').get('transform')
UDR_INPUT = TRANSFORM.get("udr_input")

# ------------------------------------------------------------------------------ #
# Create directories
# ------------------------------------------------------------------------------ #
if not os.path.exists(REPORTS_DIR):
    os.makedirs(REPORTS_DIR)

# ------------------------------------------------------------------------------ #
# Logger
# ------------------------------------------------------------------------------ #

log = Logger.get_logger(CONFIG_ID)

Logger.blank_line(log)
log.info("** TRANSFORM - INDICATORS **")
Logger.blank_line(log)

log.info(f"Extraction Directory: {EXTRACTION_DIR}")
log.info(f"Reports Directory: {REPORTS_DIR}")

#------------------------------------------------------------------------------ #
# Check if the UDR file exists and if it is a CSV File
# ------------------------------------------------------------------------------ #
if (not UDR_INPUT) or (not os.path.exists(UDR_INPUT)):
    log.error(f"UDR file {UDR_INPUT} does not exist.")
    raise Exception(f"UDR file {UDR_INPUT} does not exist")

log.info(f"UDR Input file: {UDR_INPUT}")

db = SQLAlchemyClient(CONFIG_ID)

# UDR - Load Input

Once user fills the user decision report, it is loaded into a staging table. During the load,

1. If the user has specified an input, in the `input` value is considered.
2. If the user has NOT specified an input, the `proposed` value is considered as `final`

In [None]:
# UDR: Load
# ------------------------------------------------------------------------------ #

#standard imports
import pandas as pd

#custom imports
from modules.util.helpers import convert_dataframe
from modules.util.database import T_UDR_Indicators

# constants
cols = CONFIG_GLOBAL.get("udr_columns")
reverse_cols = {val: key for key, val in cols.items()}
file = UDR_INPUT
file_err = f"{REPORTS_DIR}/UDR_Errors_Duplicates.csv"

Logger.blank_line(log)
log.info("Transform: Load UDR")
Logger.blank_line(log)

df = pd.read_csv(file, dtype=str)
df = df.rename(columns=reverse_cols)
df = df.drop(columns=['comments'])
df = convert_dataframe(df)
log.info(f"{len(df)} records extracted from {file}")

def finalValue(row, propose_col, input_col):
    if pd.isna(row[input_col]) or row[input_col].strip() == '':
        return row[propose_col].strip(), 'PROPOSE'
    else:
        return row[input_col].strip(), 'USER'

columns_to_update = [
    ('APMIndicatorPosition', 'propose_APMIndicatorPosition', 'input_APMIndicatorPosition'),
    ('APMIndicatorCategory', 'propose_APMIndicatorCategory', 'input_APMIndicatorCategory'),
    ('ERPCharacteristic', 'propose_ERPCharacteristic', 'input_ERPCharacteristic')
]

for col, propose_col, input_col in columns_to_update:
    df[[col, f"{col}_src"]] = df.apply(
        lambda row: finalValue(row, propose_col, input_col),
        axis=1, result_type='expand'
    )

if db.drop_reload:
    db.truncate(T_UDR_Indicators)  # truncate all existing data (remove if needed)

key_cols = [
    'tenantid',
    'externalId',
    'objectType',
    'APMIndicatorPosition',
    'APMIndicatorCategory',
    'ERPCharacteristic'
]

# check for duplicates from the input file
df_duplicates = df[df.duplicated(subset=key_cols, keep=False)]
df_duplicates = df_duplicates[key_cols]
df_duplicates['comments'] = 'Duplicate record'
if not df_duplicates.empty:
    df_duplicates.to_csv(file_err, index=False)
    log.error(f"Duplicate records found in UDR.")
    log.error(f'{file_err} generated')
    raise Exception("Duplicate records found in UDR")
else:
    udr_input = SQLAlchemyClient.dataframe_to_object(df, T_UDR_Indicators)
    if udr_input:
        db.insert_batches(udr_input)