<a href="https://colab.research.google.com/github/LucasMirandaVS/estudos_python/blob/main/DATA_TRANFORMATION_STEP_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. Dependencies

In [None]:
# -*- coding: utf-8 -*-
# Pandas implementation of the ACE report step-by-step (English, numbered steps)

import pandas as pd
import numpy as np
from pathlib import Path

#1. Dataframe Properties

In [None]:
# Adjust paths as needed
INPUT_XLSX = Path("/content/ES-003 Entry Summary Line Tariff Details - Test Version_395602997411203221.xlsx")
SHEET_NAME = "Query 1"  # worksheet/tab name
OUTPUT_XLSX = Path("ace_report_enriched.xlsx")

# Column names as in the sample spreadsheet
COL_ENTRY_NUM = "Entry Summary Number"
COL_LINE_NUM  = "Entry Summary Line Number"

# All requested tariff columns
TARIFF_COLS = [
    "Line Tariff Duty Amount",
    "Line MPF Amount",
    "Line HMF Amount",
    "Antidumping Duty Amount",
    "Countervailing Duty Amount",
]

# ===== STEP 0.1 — Helpers =====
def fmt_code_part(x):
    """Formats parts of the code to avoid strings like '12345.0'."""
    if pd.isna(x):
        return ""
    if isinstance(x, float) and x.is_integer():
        return str(int(x))
    s = str(x).strip()
    if s.endswith(".0") and s.replace(".0", "").isdigit():
        return s.replace(".0", "")
    return s

# ===== STEP 1 — Load data =====
df = pd.read_excel(INPUT_XLSX, sheet_name=SHEET_NAME)

# 2. Dataframe Transofrmations

In [None]:
# ===== STEP 2 — Create "Entry summary number code" =====
df["Entry summary number code"] = (
    df[COL_ENTRY_NUM].map(fmt_code_part) + "-" + df[COL_LINE_NUM].map(fmt_code_part)
)

# ===== STEP 3 — Pivot/Group: sum tariff columns by code =====
agg = (
    df.groupby("Entry summary number code", dropna=False)[TARIFF_COLS]
      .sum(min_count=1)
      .reset_index()
      .add_suffix("__agg")
)

# ===== STEP 4 — Merge: bring totals back to each row =====
df = df.merge(
    agg,
    left_on="Entry summary number code",
    right_on="Entry summary number code__agg",
    how="left",
).drop(columns=["Entry summary number code__agg"])

# ===== STEP 5 — IF logic: keep totals on only ONE row per code =====
df = df.sort_values(["Entry summary number code", COL_LINE_NUM], kind="mergesort")
is_last_of_code = df["Entry summary number code"] != df["Entry summary number code"].shift(-1)

for col in TARIFF_COLS:
    df[f"Aggregated {col}"] = np.where(is_last_of_code, df[f"{col}__agg"], 0.0)

# ===== STEP 6 — Drop original tariff and VLOOKUP-like columns =====
cols_to_drop = TARIFF_COLS + [f"{col}__agg" for col in TARIFF_COLS]
df = df.drop(columns=cols_to_drop)

# 3. Exporting the df

In [None]:
# ===== STEP 7 — Export =====
df.to_csv(OUTPUT_XLSX, index=False, encoding="utf-8")
print(f"File written: {OUTPUT_XLSX.resolve()}")

File written: /content/ace_report_enriched.xlsx
