# Fill UDR_APM_Indicators.csv

This notebook is used to fill system based created file with all indicators with an own custom mapping which basically comes
from a custom CSV file. The CSV file needs to be the following structure. 

| Column | name | Data type | Description |
| ------ | ---- | --------- | -------- |
| 1 | PAI_Beschreibung | String | PAI Equipment Description |
| 2 | PAI_Equipment | Int64 | PAI Equipment |
| 3 | PAI_Datenpunkte | String | PAI Indicator |
| 4 | PAI_Indikatorgruppe | String | PAI Indicator Group |
| 5 | Separator | String | Empty Column for better reading |
| 6 | Beschreibung | String | Target Equipment Description APM |
| 7 | Equipment | Int64 | Target Equipment APM |
| 8 | Merkmal | String | Target Characteristic APM |
| 9 | Position | String | Target Position APM |

See CSV file `CUSTOM_MAPPING_SAMPLE.csv` as reference.

The CSV file is accessed by column index. So it can also be named different, but the column position must be the same.
Once the tool has filled the customer input values in the UDR report we save this into a new file named:
`UDR_APM_Indicators_filled.csv`. Please check carefully if all data was filled correct and then replace the system
generated file with this file. (Delete the file `UDR_APM_Indicators.csv` and rename the the file `UDR_APM_Indicators_filled.csv` to `UDR_APM_Indicators.csv`).

In [None]:
%load_ext autoreload
%autoreload 2

import os
#import sys
import csv

# get the current working directory
current_dir = os.getcwd()
print(f"current work directory: {current_dir}")

# if the current working directory not ends with "notebooks", change it to the parent directory
if not current_dir.endswith("notebooks"):

    workspace_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
    os.chdir(workspace_root)

    # # Add the workspace root to the Python path
    # sys.path.insert(0, workspace_root)

    print(f"changed root work directory to: {workspace_root}")

from modules.util.config import get_config_by_id  # noqa: E402

CONFIG_ID = 'CUSTOM_TEST'
TRANSFORM = get_config_by_id(CONFIG_ID)["transform"]["indicator"]
EXTRACTION_DIR = TRANSFORM["directory"]
REPORTS_DIR = f"{EXTRACTION_DIR}/reports"
INDICATOR_CATEGORY = TRANSFORM["defaults"]["apm_default_indicator_category"]

# files
udr_file = f"{REPORTS_DIR}/UDR_APM_Indicators.csv"
custom_mapping_path = os.path.join(workspace_root, "custom", "CUSTUM_MAPPING.csv")
output_path = f"{REPORTS_DIR}/UDR_APM_Indicators_filled.csv"

def load_csv(file_path, delimiter=","):
    with open(file_path, mode="r", encoding="utf-8") as file:
        reader = csv.DictReader(file, delimiter=delimiter)
        return list(reader)


def save_csv(file_path, data, fieldnames, delimiter=","):
    with open(file_path, mode="w", encoding="utf-8", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter=delimiter)
        writer.writeheader()
        writer.writerows(data)


# Main Logic
udr_data = load_csv(udr_file, delimiter=",")
custom_mapping_data = load_csv(custom_mapping_path, delimiter=";")

try:
    # Mapping durchführen
    for udr_row in udr_data:
        found_mapping = False
        # Use a separate counter for the row index
        row_index = udr_data.index(udr_row) + 1
        print(f"Processing row {row_index} of {len(udr_data)}")
        for custom_row in custom_mapping_data:
            if (
                udr_row["Internal ID"] == custom_row["PAI_Equipment"]  # Internal ID -> PAI_Equipment
                and udr_row["Indicator Group"] == custom_row["PAI_Indikatorgruppe"]  # Indicator Group -> PAI_Indikatorgruppe
                and udr_row["Indicator"] == custom_row["PAI_Datenpunkte"]  # Indicator -> PAI_Datenpunkte
            ):
                # Werte aus CUSTOM_MAPPING.csv übernehmen
                udr_row["Input: APM Indicator Position"] = custom_row["Position"]  # Input: APM Indicator Position -> Position
                udr_row["Input: ERP Characteristic"] = custom_row["Merkmal"]  # Input: ERP Characteristic -> Merkmal
                udr_row["Input: APM Indicator Category"] = INDICATOR_CATEGORY  # Input: APM Indicator Category
                print(
                    f"Mapping found for: {udr_row["Internal ID"]} - "
                    f"{udr_row["Indicator Group"]} with position "
                    f"{custom_row["Position"]}"
                )
                found_mapping = True
                break
        if not found_mapping:
            print(
                f"No mapping found for: {udr_row['Internal ID']} - {udr_row['Indicator']}"
            )

    # print the number of rows in udr_data
    print(f"Lines of data in udr_data: {len(udr_data)}")

    # Delete all lines with empty "Input: APM Indicator Position"
    udr_data = [
        row for row in udr_data if row["Input: APM Indicator Position"].strip()
    ]
    # Delete all lines with empty "Input: ERP Characteristic"
    udr_data = [row for row in udr_data if row["Input: ERP Characteristic"].strip()]
    # Delete all lines with empty "Input: APM Indicator Category"
    udr_data = [
        row for row in udr_data if row["Input: APM Indicator Category"].strip()
    ]

    print(f"Lines of data in udr_data after cleaning: {len(udr_data)}")

    # save the cleaned data to a new CSV file
    save_csv(output_path, udr_data, fieldnames=udr_data[0].keys(), delimiter=",")
    print(f"Saved cleaned data to {output_path}")
except KeyError as e:
    print(f"Error: Column missing in CSV: {e}")
except Exception as e:
    print(f"Unknown error occured: {e}")


