In [9]:
import pandas as pd
import re
import os

# 1. LOAD MASTER DATA (REPORT AB)
file_master = 'REPORT AB MAR 2025 (acuan nama alat berat).xlsx'
master_lookup = {}  # Map: Alias/Code -> Real Name in Master
master_jenis = {}   # Map: Real Name -> Jenis Alat Berat

print(f"Membaca Master Data: {file_master}...")

try:
    df_master = pd.read_excel(file_master, sheet_name=0)
    
    # Columns for aliases
    target_cols = ['Nama Alat Berat', 'Kode Lama', 'Kode Baru', 'No. Polisi Lama', 'No. Polisi Baru']
    existing_cols = [c for c in target_cols if c in df_master.columns]
    
    # Column for filtering types
    col_jenis = 'Jenis Alat Berat' if 'Jenis Alat Berat' in df_master.columns else None

    for idx, row in df_master.iterrows():
        real_name = str(row.get('Nama Alat Berat', f"Row {idx}")).strip().upper()
        jenis_alat = str(row.get(col_jenis, "")).strip().upper() if col_jenis else ""
        
        # Store Type Info
        master_jenis[real_name] = jenis_alat
        
        # Build Index for all aliases
        for col in existing_cols:
            val = row[col]
            if pd.notna(val):
                val_str = str(val).strip().upper()
                # Normalize spaces (L 8084 NQ vs L8084NQ)
                val_clean = " ".join(val_str.split()) 
                val_nospace = val_clean.replace(" ", "")
                
                if val_clean not in master_lookup:
                    master_lookup[val_clean] = real_name
                if val_nospace not in master_lookup:
                    master_lookup[val_nospace] = real_name
                    
    print(f"Master Data Loaded. {len(master_jenis)} unique units indexed.")

except Exception as e:
    print(f"Gagal membaca file Master: {e}")
    df_master = pd.DataFrame()

# 2. LOAD BBM DATA
file_bbm = 'BBM AAB.xlsx'
bbm_raw_names = set()
target_sheets = ['JAN', 'FEB', 'MAR', 'APR', 'MEI', 'JUN', 'JUL', 'AGT', 'SEP', 'OKT', 'NOV']

print(f"\nMembaca File BBM: {file_bbm}...")

if os.path.exists(file_bbm):
    try:
        xls = pd.ExcelFile(file_bbm)
        for sheet in target_sheets:
            if sheet in xls.sheet_names:
                # Read Header Row (Row 0)
                df_sheet = pd.read_excel(xls, sheet_name=sheet, header=None, nrows=1)
                row_0 = df_sheet.iloc[0].tolist()
                
                for item in row_0:
                    item_str = str(item).strip().upper()
                    # Filter basic noise
                    invalid = ['NAN', 'UNNAMED', 'EQUIP NAME', 'TANGGAL', 'GROUP KPI', 'NAT', 'TOTAL']
                    if item_str and not any(x in item_str for x in invalid):
                        bbm_raw_names.add(" ".join(item_str.split()))
    except Exception as e:
        print(f"Error BBM: {e}")
else:
    print("File BBM not found.")

print(f"Ditemukan {len(bbm_raw_names)} nama unit unik di BBM.")

# 3. MATCHING LOGIC
matched_master_units = set()
unmatched_bbm_names = []

for bbm_name in bbm_raw_names:
    found_master = None
    
    # A. Direct Match
    if bbm_name in master_lookup:
        found_master = master_lookup[bbm_name]
    
    # B. Try No Space
    elif bbm_name.replace(" ", "") in master_lookup:
        found_master = master_lookup[bbm_name.replace(" ", "")]
        
    # C. Regex "EX" Parsing
    else:
        # Pattern: NAME (EX OLD_NAME)
        match = re.search(r"^(.*?)\s*\((?:EX[\.\s]*)(.*?)\)", bbm_name)
        if match:
            part_new = match.group(1).strip()
            part_old = match.group(2).strip()
            
            # Check New Name
            if part_new in master_lookup:
                found_master = master_lookup[part_new]
            # Check Old Name
            elif part_old in master_lookup:
                found_master = master_lookup[part_old]
            # Check New Name (No Space)
            elif part_new.replace(" ","") in master_lookup:
                found_master = master_lookup[part_new.replace(" ","")]
             # Check Old Name (No Space)
            elif part_old.replace(" ","") in master_lookup:
                found_master = master_lookup[part_old.replace(" ","")]

    if found_master:
        matched_master_units.add(found_master)
    else:
        unmatched_bbm_names.append(bbm_name)

# 4. FILTERING "MISSING IN BBM" LIST
all_master_units = set(master_jenis.keys())
missing_in_bbm_all = all_master_units - matched_master_units

missing_in_bbm_filtered = []
for unit in missing_in_bbm_all:
    jenis = master_jenis.get(unit, "")
    
    # --- LOGIC FILTER TAMBAHAN DI SINI ---
    # Syarat: Bukan Bengkel, Bukan Kereta Tempel, DAN Bukan Dummy
    if ("BENGKEL" not in jenis and 
        "KERETA TEMPEL" not in jenis and 
        "DUMMY" not in unit):  # <--- Filter Dummy ditambahkan
        
        missing_in_bbm_filtered.append(unit)

# Sort lists
unmatched_bbm_names.sort()
missing_in_bbm_filtered.sort()

# 5. PRINT RESULTS
print("\n")
print("HASIL ANALISA KELENGKAPAN UNIT (FILTERED)")

print(f"\n1. LIST NAMA DI BBM YANG TIDAK ADA DI MASTER ({len(unmatched_bbm_names)} Unit):")
print("(Kemungkinan Typo, Unit Baru, atau Aset Sewa)")
if unmatched_bbm_names:
    for i, name in enumerate(unmatched_bbm_names, 1):
        print(f"   {i}. {name}")
else:
    print("(Nihil - Semua unit BBM terdaftar di Master)")

print(f"\n2. LIST UNIT MASTER YANG TIDAK ADA DI BBM ({len(missing_in_bbm_filtered)} Unit):")
print("(Filter Aktif: Non-Bengkel, Non-Kereta Tempel, Non-Dummy)")
if missing_in_bbm_filtered:
    # Print first 50
    for i, name in enumerate(missing_in_bbm_filtered, 1):
        jenis = master_jenis.get(name, "-")
        print(f"   {i}. {name} [{jenis}]")
        if i >= 50: 
            print(f"... dan {len(missing_in_bbm_filtered)-50} unit lainnya.")
            break
else:
    print("(Nihil - Semua unit valid aktif mengisi BBM)")

# 6. EXPORT EXCEL
df_unmatched_bbm = pd.DataFrame(unmatched_bbm_names, columns=['Unit BBM Tidak Dikenal'])
df_missing_master = pd.DataFrame(missing_in_bbm_filtered, columns=['Unit Master Absen BBM (Filtered)'])

with pd.ExcelWriter('Laporan_Perbedaan_Unit.xlsx') as writer:
    df_unmatched_bbm.to_excel(writer, sheet_name='Unknown_in_BBM', index=False)
    df_missing_master.to_excel(writer, sheet_name='Missing_in_BBM', index=False)

print(f"\nDetail lengkap tersimpan di file: 'Laporan_Perbedaan_Unit.xlsx'")

Membaca Master Data: REPORT AB MAR 2025 (acuan nama alat berat).xlsx...
Master Data Loaded. 508 unique units indexed.

Membaca File BBM: BBM AAB.xlsx...
Ditemukan 331 nama unit unik di BBM.


HASIL ANALISA KELENGKAPAN UNIT (FILTERED)

1. LIST NAMA DI BBM YANG TIDAK ADA DI MASTER (1 Unit):
(Kemungkinan Typo, Unit Baru, atau Aset Sewa)
   1. MOBIL STOORING MEKANIK TRAILER

2. LIST UNIT MASTER YANG TIDAK ADA DI BBM (13 Unit):
(Filter Aktif: Non-Bengkel, Non-Kereta Tempel, Non-Dummy)
   1. CRIMPING 01 [MESIN LAIN-LAIN]
   2. FORKLIF MITS/KENANGA [FORKLIFT]
   3. HITACHI 80T/KUDANIL [CRANE]
   4. KOMPRESSOR BAN CAB TERNATE [MESIN LAIN-LAIN]
   5. L8002PT EX.L8365F [TRAILER]
   6. MOBIL OPERASIONAL CAB MANOKWARI [MESIN LAIN-LAIN]
   7. MOBIL STOORING 01 CAB JAKARTA [MESIN LAIN-LAIN]
   8. MOBIL STOORING 02 CAB JAKARTA [MESIN LAIN-LAIN]
   9. MOBIL STOORING MEKANIK CAB BERAU [MESIN LAIN-LAIN]
   10. MOBIL STOORING MEKANIK CAB MANOKWARI [MESIN LAIN-LAIN]
   11. MOBIL STOORING MEKANIK CAB SAMAR