In [9]:
import pandas as pd
import glob
import os
import re # Library untuk perbaikan teks
import warnings

warnings.filterwarnings('ignore')

# --- FUNGSI PEMBERSIH NAMA UNIT (AUTO-CORRECT) ---
def normalize_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    
    # 1. Ganti simbol aneh (/, -, .) dengan SPASI
    name = re.sub(r'[/\-._]', ' ', name)
    
    # 2. Perbaiki Typo 'FORKLIF' -> 'FORKLIFT'
    # Jika ada spasi setelahnya (misal: FORKLIF MITS)
    name = name.replace('FORKLIF ', 'FORKLIFT ')
    # Jika kata tersebut ada di paling akhir
    if name.endswith('FORKLIF'): 
        name = name + 'T'
        
    # 3. Hapus spasi ganda
    name = " ".join(name.split())
    
    return name

# ==============================================================================
# BAGIAN 1: LOAD DATA CABANG (DENGAN NORMALISASI)
# ==============================================================================
print("üöÄ MEMULAI PROSES LOAD DATA CABANG...")

base_folder = "Data Setiap Cabang"
if not os.path.exists(base_folder):
    branch_files = glob.glob("*Rpt HMU*.csv") + glob.glob("*Rpt HMU*.xlsx")
else:
    search_pattern_csv = os.path.join(base_folder, "**", "*Rpt HMU*.csv")
    search_pattern_xlsx = os.path.join(base_folder, "**", "*Rpt HMU*.xlsx")
    branch_files = glob.glob(search_pattern_csv, recursive=True) + glob.glob(search_pattern_xlsx, recursive=True)

print(f"üìÇ Ditemukan Total {len(branch_files)} file cabang.")

list_ops = []
unit_source_map = {} 

for f in branch_files:
    try:
        basename = os.path.basename(f)
        clean_filename = basename.replace(' - form2Strokes.csv', '').replace('.xlsx', '').replace('.csv', '')
        
        if f.endswith('.csv'): df = pd.read_csv(f)
        else: df = pd.read_excel(f, engine='openpyxl')
            
        df.columns = [str(c).strip().upper() for c in df.columns]
        
        if 'EQUIP NAME' in df.columns and 'HMU' in df.columns:
            date_cols = [c for c in df.columns if 'TGL' in c or 'DATE' in c]
            if not date_cols: continue
            target_date_col = date_cols[0]
            
            df_subset = df[['EQUIP NAME', target_date_col, 'HMU']].copy()
            df_subset['Date'] = pd.to_datetime(df_subset[target_date_col], dayfirst=True, errors='coerce')
            
            # Gunakan Float agar aman dari error desimal
            df_subset['HMU'] = pd.to_numeric(df_subset['HMU'], errors='coerce')
            df_subset['Source_File'] = clean_filename 
            
            # --- TERAPKAN NORMALISASI NAMA ---
            df_subset['Unit_Clean'] = df_subset['EQUIP NAME'].apply(normalize_unit_name)
            
            # Filter Data Valid
            df_subset = df_subset.dropna(subset=['Date', 'HMU'])
            df_subset = df_subset[df_subset['Date'].dt.year == 2025]
            
            if not df_subset.empty:
                list_ops.append(df_subset[['Unit_Clean', 'Date', 'HMU', 'Source_File']])
                for u in df_subset['Unit_Clean'].unique():
                    if u not in unit_source_map: unit_source_map[u] = set()
                    unit_source_map[u].add(clean_filename)
                    
    except Exception as e:
        print(f"   ‚ùå Gagal membaca {basename}: {e}")

if list_ops:
    df_ops_all = pd.concat(list_ops, ignore_index=True)
    df_ops_all['Month'] = df_ops_all['Date'].dt.to_period('M')
    
    # 1. Sortir Tanggal (Agar urut dari tgl 1 s/d 31)
    df_ops_all.sort_values(['Unit_Clean', 'Date'], ascending=[True, True], inplace=True)
    
    # 2. Ambil Nilai Pada TANGGAL TERAKHIR (Last)
    ops_monthly = df_ops_all.groupby(['Unit_Clean', 'Month']).agg({
        'HMU': 'last',
        'Date': 'last',
        'Source_File': 'last'
    }).reset_index()
    
    ops_monthly.rename(columns={'HMU': 'HM_Ops_End_Month', 'Date': 'Tanggal_Laporan_Terakhir'}, inplace=True)
    
    del df_ops_all; del list_ops
    print(f"‚úÖ DATA CABANG SELESAI DIPROSES.")
    
    # Debug Khusus Kenanga
    cek = [u for u in unit_source_map.keys() if 'KENANGA' in u]
    print(f"   [INFO] Unit Kenanga terdeteksi di Cabang sebagai: {cek}")
else:
    print("‚ùå TIDAK ADA DATA CABANG YANG TERBACA.")
    ops_monthly = pd.DataFrame()

üöÄ MEMULAI PROSES LOAD DATA CABANG...
üìÇ Ditemukan Total 14 file cabang.


‚úÖ DATA CABANG SELESAI DIPROSES.
   [INFO] Unit Kenanga terdeteksi di Cabang sebagai: ['FORKLIFT MITS KENANGA']


In [10]:
import pandas as pd
import glob
import os
import re
import numpy as np # Pastikan numpy diimport

# ==============================================================================
# BAGIAN 2: LOAD DATA BBM (FIX BUG FILTER NAN & FFILL)
# ==============================================================================
print("üöÄ MEMULAI PROSES LOAD DATA BBM...")

bbm_files = glob.glob("BBM AAB.xlsx") + glob.glob("BBM AAB.xlsx - *.csv")
bbm_data = []
units_bbm = set()
unit_bbm_source_map = {} 

month_map = {
    'JAN': '2025-01', 'FEB': '2025-02', 'MAR': '2025-03', 'APR': '2025-04',
    'MEI': '2025-05', 'JUN': '2025-06', 'JUL': '2025-07', 'AGT': '2025-08',
    'SEP': '2025-09', 'OKT': '2025-10', 'NOV': '2025-11', 'DES': '2025-12'
}

# Fungsi Normalisasi (Harus sama dengan di Cell 1)
def normalize_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    name = re.sub(r'[/\-._]', ' ', name)
    name = name.replace('FORKLIF ', 'FORKLIFT ')
    if name.endswith('FORKLIF'): name = name + 'T'
    name = " ".join(name.split())
    return name

for f in bbm_files:
    try:
        if f.endswith('.xlsx'):
            xls = pd.ExcelFile(f)
            sheet_names = xls.sheet_names
        else:
            basename = os.path.basename(f)
            sheet_name_from_file = basename.split(' - ')[-1].replace('.csv', '').strip()
            sheet_names = [sheet_name_from_file]
        
        for sheet in sheet_names:
            if sheet in month_map:
                period = pd.Period(month_map[sheet])
                
                # Baca Data
                if f.endswith('.xlsx'): df_raw = pd.read_excel(xls, sheet_name=sheet, header=None)
                else: df_raw = pd.read_csv(f, header=None)
                
                # --- FIX 1: Handle Merged Cells (Forward Fill) yang Lebih Kuat ---
                # Ganti string kosong/spasi dengan NaN dulu agar ffill bekerja
                row0 = df_raw.iloc[0].replace(r'^\s*$', np.nan, regex=True)
                row_units = row0.fillna(method='ffill').astype(str)
                
                row_headers = df_raw.iloc[2]
                hm_indices = [i for i, x in enumerate(row_headers) if str(x).strip().upper() == 'HM']
                
                for idx in hm_indices:
                    raw_name = str(row_units[idx]).strip().upper()
                    
                    # --- FIX 2: LOGIKA FILTER YANG BENAR ---
                    # Hapus 'NAN' dari forbidden_substrings karena bisa memakan 'KENANGA'
                    forbidden_substrings = ['UNNAMED', 'EQUIP NAME', 'TANGGAL', 'GROUP KPI', 'TOTAL']
                    
                    # Cek 1: Apakah nama unit mengandung kata terlarang?
                    has_forbidden = any(sub in raw_name for sub in forbidden_substrings)
                    
                    # Cek 2: Apakah nama unit BENAR-BENAR "NAN" (bukan bagian kata)
                    is_nan_word = raw_name in ['NAN', 'NAN', 'NAT']
                    
                    if not has_forbidden and not is_nan_word:
                        
                        clean_name = normalize_unit_name(raw_name)
                        
                        units_bbm.add(clean_name)
                        if clean_name not in unit_bbm_source_map: unit_bbm_source_map[clean_name] = set()
                        unit_bbm_source_map[clean_name].add(sheet)
                        
                        # Ambil Data HM
                        dates = pd.to_datetime(df_raw.iloc[3:, 0], dayfirst=True, errors='coerce')
                        hms = pd.to_numeric(df_raw.iloc[3:, idx], errors='coerce')
                        
                        df_temp = pd.DataFrame({'Date': dates, 'HM': hms})
                        df_valid = df_temp[ (df_temp['HM'] > 0) & (df_temp['Date'].notna()) ].copy()
                        
                        if not df_valid.empty:
                            df_valid.sort_values('Date', ascending=True, inplace=True)
                            last_hm = df_valid.iloc[-1]['HM']
                            
                            bbm_data.append({
                                'Unit_Clean': clean_name,
                                'Month': period,
                                'HM_BBM_End_Month': last_hm
                            })

    except Exception as e:
        print(f"   ‚ö†Ô∏è Error membaca {f}: {e}")

if bbm_data:
    df_bbm_monthly = pd.DataFrame(bbm_data)
    print(f"‚úÖ DATA BBM SELESAI DIPROSES.")
    
    # Verifikasi langsung ke user
    cek_kenanga = [u for u in units_bbm if 'KENANGA' in u]
    print(f"   [INFO] Unit Kenanga di BBM sekarang: {cek_kenanga}")
else:
    df_bbm_monthly = pd.DataFrame()
    print("‚ùå Data BBM Kosong.")

üöÄ MEMULAI PROSES LOAD DATA BBM...
‚úÖ DATA BBM SELESAI DIPROSES.
   [INFO] Unit Kenanga di BBM sekarang: ['FORKLIFT MITS KENANGA']


In [16]:
# ==============================================================================
# BAGIAN 3: ANALISA KONSISTENSI & EKSISTENSI
# ==============================================================================
print("üîç MENJALANKAN ANALISA KONSISTENSI...")

if not ops_monthly.empty:
    ops_monthly['Month_Str'] = ops_monthly['Month'].astype(str)
    consistency_matrix = pd.crosstab(ops_monthly['Unit_Clean'], ops_monthly['Month_Str'])
    
    all_months_2025 = [f"2025-{str(i).zfill(2)}" for i in range(1, 13)]
    for m in all_months_2025:
        if m not in consistency_matrix.columns: consistency_matrix[m] = 0
    consistency_matrix = consistency_matrix[all_months_2025]
    
    consistency_results = []
    for unit, row in consistency_matrix.iterrows():
        missing_months = [m for m in all_months_2025 if row[m] == 0]
        if len(missing_months) == 0: status = "LENGKAP"
        elif len(missing_months) == 12: status = "TIDAK ADA DATA"
        else: status = "TIDAK LENGKAP"
        
        consistency_results.append({
            'Unit_Clean': unit,
            'Status_Kelengkapan': status,
            'Bulan_Yang_Hilang': ", ".join(missing_months)
        })
    df_consistency = pd.DataFrame(consistency_results)
else:
    df_consistency = pd.DataFrame()

all_branch_units = set(unit_source_map.keys())
all_bbm_units = units_bbm
branch_only_units = sorted(list(all_branch_units - all_bbm_units))
bbm_only_units = sorted(list(all_bbm_units - all_branch_units))

branch_only_data = [{'Unit': u, 'Found_In_File': ", ".join(sorted(list(unit_source_map[u])))} for u in branch_only_units]
bbm_only_data = [{'Unit': u, 'Found_In_Month': ", ".join(sorted(list(unit_bbm_source_map[u])))} for u in bbm_only_units]

print(f"   -> Unit Cabang Only: {len(branch_only_units)}")
print(f"   -> Unit BBM Only: {len(bbm_only_units)}")

üîç MENJALANKAN ANALISA KONSISTENSI...
   -> Unit Cabang Only: 2
   -> Unit BBM Only: 59


In [17]:
# ==============================================================================
# BAGIAN 4: ANALISA HM & EXPORT (FINAL)
# ==============================================================================
print("\nüîç MENJALANKAN ANALISA 2: Perbandingan HM Bulanan...")

if not ops_monthly.empty and not df_bbm_monthly.empty:
    df_compare = pd.merge(ops_monthly, df_bbm_monthly, on=['Unit_Clean', 'Month'], how='outer')
    
    # Hitung Selisih
    df_compare['Selisih'] = df_compare['HM_Ops_End_Month'] - df_compare['HM_BBM_End_Month']
    
    # --- CONVERT KE INTEGER (SAFE WAY) ---
    # Round dulu -> Convert Int64 (Support NaN)
    cols_to_int = ['HM_Ops_End_Month', 'HM_BBM_End_Month', 'Selisih']
    for col in cols_to_int:
        if col in df_compare.columns:
            df_compare[col] = df_compare[col].round(0).astype('Int64')

    # Status Logic
    def determine_status(row):
        hm_ops = row['HM_Ops_End_Month']
        hm_bbm = row['HM_BBM_End_Month']
        
        if pd.isna(hm_ops) and pd.isna(hm_bbm): return "DATA ERROR"
        elif pd.isna(hm_ops): return "DATA CABANG KOSONG"
        elif pd.isna(hm_bbm): return "DATA BBM KOSONG"
        else:
            selisih = hm_ops - hm_bbm
            if selisih == 0: return "MATCH"
            elif selisih > 0: return "WAJAR (Ops > BBM)"
            elif selisih < 0: return "ANOMALI (BBM > Ops)"
            return "-"

    df_compare['Status'] = df_compare.apply(determine_status, axis=1)

    if 'Source_File' in df_compare.columns:
        df_compare['Source_File'].fillna('-', inplace=True)
        df_compare.rename(columns={'Source_File': 'Lokasi_File_Cabang'}, inplace=True)
        
    if 'Tanggal_Laporan_Terakhir' in df_compare.columns:
         df_compare['Tanggal_Laporan_Terakhir'] = df_compare['Tanggal_Laporan_Terakhir'].dt.strftime('%Y-%m-%d')
         df_compare['Tanggal_Laporan_Terakhir'].fillna('-', inplace=True)

    df_compare.sort_values(['Unit_Clean', 'Month'], inplace=True)
    anomalies = df_compare[df_compare['Status'].str.contains("ANOMALI", na=False)].copy()
    
    print(f"   -> Total Data: {len(df_compare)}")
    print(f"   -> Anomali: {len(anomalies)}")

else:
    df_compare = pd.DataFrame(); anomalies = pd.DataFrame()

output_file = 'Analisa_HM_Setiap_Cabang2.xlsx'
try:
    with pd.ExcelWriter(output_file) as writer:
        if not df_consistency.empty: df_consistency.to_excel(writer, sheet_name='Analisa_Konsistensi_Cabang', index=False)
        if branch_only_data: pd.DataFrame(branch_only_data).to_excel(writer, sheet_name='Missing_In_BBM', index=False)
        if bbm_only_data: pd.DataFrame(bbm_only_data).to_excel(writer, sheet_name='Missing_In_Branch', index=False)
        if not df_compare.empty:
            cols = ['Unit_Clean', 'Month', 'HM_Ops_End_Month', 'HM_BBM_End_Month', 'Selisih', 'Status', 'Tanggal_Laporan_Terakhir', 'Lokasi_File_Cabang']
            final_cols = [c for c in cols if c in df_compare.columns]
            df_compare[final_cols].to_excel(writer, sheet_name='All_HM_Comparison', index=False)
        if not anomalies.empty: anomalies.to_excel(writer, sheet_name='HM_Anomalies_Alert', index=False)
            
    print(f"\nüíæ  SUKSES! File tersimpan: '{output_file}'")
    print("    Unit FORKLIFT KENANGA sekarang seharusnya sudah MATCH.")
except Exception as e:
    print(f"\n‚ö†Ô∏è  Gagal menyimpan: {e}")


üîç MENJALANKAN ANALISA 2: Perbandingan HM Bulanan...
   -> Total Data: 2908
   -> Anomali: 0

üíæ  SUKSES! File tersimpan: 'Analisa_HM_Setiap_Cabang2.xlsx'
    Unit FORKLIFT KENANGA sekarang seharusnya sudah MATCH.


## PENGECEKAN APAKAH ALAT BERAT DI SBY & SDA ADA DI JKT

In [18]:
import pandas as pd
import glob
import os
import re
import gc # Garbage Collector untuk bersih-bersih RAM

# ==============================================================================
# BAGIAN 1: SETUP & LOAD DATA SUMBER (SBY & SDA)
# ==============================================================================
print("üöÄ [1/3] MEMBACA DATA SUMBER (SBY & SDA)...")

# --- Fungsi Normalisasi ---
def normalize_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    name = re.sub(r'[/\-._]', ' ', name)
    name = name.replace('FORKLIF ', 'FORKLIFT ')
    if name.endswith('FORKLIF'): name = name + 'T'
    name = " ".join(name.split())
    return name

# --- Konfigurasi File ---
base_folder = "Data Setiap Cabang"
# Pola pencarian file (Recursive)
files_sby = glob.glob(os.path.join(base_folder, "**", "*SBY Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*SBY Rpt HMU*.csv"), recursive=True)

files_sda = glob.glob(os.path.join(base_folder, "**", "*SDA Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*SDA Rpt HMU*.csv"), recursive=True)

print(f"üìÇ File SBY ditemukan: {len(files_sby)}")
print(f"üìÇ File SDA ditemukan: {len(files_sda)}")

# --- Load Unit SBY ---
sby_units_source = set()
for f in files_sby:
    try:
        # Hemat RAM: Baca kolom EQUIP NAME saja jika CSV
        if f.endswith('.csv'): df = pd.read_csv(f, usecols=lambda x: 'EQUIP' in x.upper())
        else: df = pd.read_excel(f, engine='openpyxl') # Excel baca semua dulu
        
        df.columns = [str(c).strip().upper() for c in df.columns]
        if 'EQUIP NAME' in df.columns:
            for name in df['EQUIP NAME'].dropna().unique():
                sby_units_source.add(normalize_unit_name(name))
        
        del df; gc.collect() # Hapus dari memori
    except: pass

print(f"   -> Unit Unik SBY (Source): {len(sby_units_source)}")

# --- Load Unit SDA ---
sda_units_source = set()
for f in files_sda:
    try:
        if f.endswith('.csv'): df = pd.read_csv(f, usecols=lambda x: 'EQUIP' in x.upper())
        else: df = pd.read_excel(f, engine='openpyxl')
        
        df.columns = [str(c).strip().upper() for c in df.columns]
        if 'EQUIP NAME' in df.columns:
            for name in df['EQUIP NAME'].dropna().unique():
                sda_units_source.add(normalize_unit_name(name))
        
        del df; gc.collect()
    except: pass

print(f"   -> Unit Unik SDA (Source): {len(sda_units_source)}")

üöÄ [1/3] MEMBACA DATA SUMBER (SBY & SDA)...
üìÇ File SBY ditemukan: 2
üìÇ File SDA ditemukan: 1
   -> Unit Unik SBY (Source): 110
   -> Unit Unik SDA (Source): 9


In [19]:
# ==============================================================================
# BAGIAN 2: LOAD DATA INDUK JKT (CHECK PORT)
# ==============================================================================
print("üöÄ [2/3] MEMERIKSA FILE INDUK (JKT)...")

files_jkt = glob.glob(os.path.join(base_folder, "**", "*JKT Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*JKT Rpt HMU*.csv"), recursive=True)

jkt_port_sby = set()
jkt_port_sda = set()

for f in files_jkt:
    try:
        # Hemat RAM: Coba baca kolom penting saja
        if f.endswith('.csv'): 
            df = pd.read_csv(f, usecols=lambda x: x.upper().strip() in ['EQUIP NAME', 'PORT'])
        else: 
            df = pd.read_excel(f, engine='openpyxl') 
        
        df.columns = [str(c).strip().upper() for c in df.columns]
        
        if 'EQUIP NAME' in df.columns and 'PORT' in df.columns:
            # Normalisasi
            df['Unit_Clean'] = df['EQUIP NAME'].apply(normalize_unit_name)
            df['PORT_Clean'] = df['PORT'].astype(str).str.upper().str.strip()
            
            # Ambil Unit SBY yang ada di JKT
            sby_matches = df[df['PORT_Clean'] == 'SBY']['Unit_Clean'].unique()
            jkt_port_sby.update(sby_matches)
            
            # Ambil Unit SDA yang ada di JKT
            sda_matches = df[df['PORT_Clean'] == 'SDA']['Unit_Clean'].unique()
            jkt_port_sda.update(sda_matches)
            
        del df; gc.collect()
        
    except Exception as e:
        print(f"   ‚ö†Ô∏è Skip {os.path.basename(f)}")

print(f"   -> Di File JKT, ditemukan {len(jkt_port_sby)} unit PORT SBY.")
print(f"   -> Di File JKT, ditemukan {len(jkt_port_sda)} unit PORT SDA.")

üöÄ [2/3] MEMERIKSA FILE INDUK (JKT)...
   ‚ö†Ô∏è Skip ~$JKT Rpt HMU 01-02-2025-28-02-2025.xlsx
   -> Di File JKT, ditemukan 112 unit PORT SBY.
   -> Di File JKT, ditemukan 9 unit PORT SDA.


In [20]:
# ==============================================================================
# BAGIAN 3: HASIL CROSS-CHECK
# ==============================================================================
print("\nüöÄ [3/3] HASIL ANALISA KELENGKAPAN DATA:")

# --- Cek SBY ---
print(f"\n--- [CABANG SURABAYA] ---")
missing_sby = sby_units_source - jkt_port_sby

if len(sby_units_source) == 0:
    print("‚ö†Ô∏è  Data Source SBY Kosong.")
elif len(missing_sby) == 0:
    print("‚úÖ VALID! Semua unit di file SBY sudah masuk ke file JKT.")
else:
    print(f"‚ùå TIDAK LENGKAP! Ada {len(missing_sby)} unit SBY yang belum masuk file JKT.")
    print(f"   Daftar Unit Hilang: {sorted(list(missing_sby))}")

# --- Cek SDA ---
print(f"\n--- [CABANG SIDOARJO] ---")
missing_sda = sda_units_source - jkt_port_sda

if len(sda_units_source) == 0:
    print("‚ö†Ô∏è  Data Source SDA Kosong.")
elif len(missing_sda) == 0:
    print("‚úÖ VALID! Semua unit di file SDA sudah masuk ke file JKT.")
else:
    print(f"‚ùå TIDAK LENGKAP! Ada {len(missing_sda)} unit SDA yang belum masuk file JKT.")
    print(f"   Daftar Unit Hilang: {sorted(list(missing_sda))}")


üöÄ [3/3] HASIL ANALISA KELENGKAPAN DATA:

--- [CABANG SURABAYA] ---
‚úÖ VALID! Semua unit di file SBY sudah masuk ke file JKT.

--- [CABANG SIDOARJO] ---
‚úÖ VALID! Semua unit di file SDA sudah masuk ke file JKT.


In [24]:
import pandas as pd
import glob
import os
import re
import gc

# ==============================================================================
# BAGIAN 4: DETAIL PENGECEKAN DENGAN LOKASI FILE
# ==============================================================================
print("üöÄ [4/4] MENJALANKAN AUDIT DETAIL (UNIT + LOKASI FILE)...")

# --- Fungsi Normalisasi ---
def normalize_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    name = re.sub(r'[/\-._]', ' ', name)
    name = name.replace('FORKLIF ', 'FORKLIFT ')
    if name.endswith('FORKLIF'): name = name + 'T'
    name = " ".join(name.split())
    return name

# --- Fungsi Scanning Cerdas (Menyimpan Nama File) ---
def scan_units_with_files(file_list, target_port=None):
    unit_map = {} # Format: {'NAMA_UNIT': {'file1', 'file2'}}
    
    for f in file_list:
        try:
            filename = os.path.basename(f)
            
            # Load Data (Hemat RAM)
            if f.endswith('.csv'): 
                cols = ['EQUIP NAME']
                if target_port: cols.append('PORT')
                df = pd.read_csv(f, usecols=lambda x: x.upper().strip() in cols)
            else: 
                df = pd.read_excel(f, engine='openpyxl')
            
            df.columns = [str(c).strip().upper() for c in df.columns]
            
            if 'EQUIP NAME' in df.columns:
                # Filter PORT jika diminta (Khusus file JKT)
                if target_port and 'PORT' in df.columns:
                    df = df[df['PORT'].astype(str).str.upper().str.strip() == target_port]
                
                # Ambil Unit
                for name in df['EQUIP NAME'].dropna().unique():
                    clean_name = normalize_unit_name(name)
                    if clean_name:
                        if clean_name not in unit_map: unit_map[clean_name] = set()
                        unit_map[clean_name].add(filename)
                        
            del df; gc.collect()
        except: pass
        
    return unit_map

# --- 1. SETUP FILE ---
base_folder = "Data Setiap Cabang"
files_sby = glob.glob(os.path.join(base_folder, "**", "*SBY Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*SBY Rpt HMU*.csv"), recursive=True)
files_sda = glob.glob(os.path.join(base_folder, "**", "*SDA Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*SDA Rpt HMU*.csv"), recursive=True)
files_jkt = glob.glob(os.path.join(base_folder, "**", "*JKT Rpt HMU*.xlsx"), recursive=True) + \
            glob.glob(os.path.join(base_folder, "**", "*JKT Rpt HMU*.csv"), recursive=True)

# --- 2. SCANNING DATA ---
print("   -> Scanning File SBY...")
map_sby_source = scan_units_with_files(files_sby)

print("   -> Scanning File SDA...")
map_sda_source = scan_units_with_files(files_sda)

print("   -> Scanning File JKT (Filter Port SBY)...")
map_jkt_sby = scan_units_with_files(files_jkt, target_port='SBY')

print("   -> Scanning File JKT (Filter Port SDA)...")
map_jkt_sda = scan_units_with_files(files_jkt, target_port='SDA')


# --- 3. LAPORAN DETAIL ---
def print_detail_report(title, unit_set, source_map):
    print(f"\nüëâ {title} [Total: {len(unit_set)}]:")
    if not unit_set:
        print("   ‚úÖ NIHIL (Data Sinkron)")
    else:
        for i, u in enumerate(sorted(list(unit_set)), 1):
            files = ", ".join(sorted(list(source_map.get(u, {'-'}))))
            print(f"   {i}. {u}")
            print(f"      Lokasi: {files}")

# === LAPORAN SURABAYA (SBY) ===
print("\n" + "="*60)
print("   AUDIT DETAIL: SURABAYA (SBY)")
print("="*60)

# A. Ada di Cabang, Hilang di JKT
missing_in_jkt_sby = set(map_sby_source.keys()) - set(map_jkt_sby.keys())
print_detail_report("Ada di File SBY, tapi TIDAK ADA di File JKT", missing_in_jkt_sby, map_sby_source)

# B. Ada di JKT, Tidak ada di Cabang
extra_in_jkt_sby = set(map_jkt_sby.keys()) - set(map_sby_source.keys())
print_detail_report("Ada di File JKT (Port SBY), tapi TIDAK ADA di File SBY", extra_in_jkt_sby, map_jkt_sby)


# === LAPORAN SIDOARJO (SDA) ===
print("\n" + "="*60)
print("   AUDIT DETAIL: SIDOARJO (SDA)")
print("="*60)

# A. Ada di Cabang, Hilang di JKT
missing_in_jkt_sda = set(map_sda_source.keys()) - set(map_jkt_sda.keys())
print_detail_report("Ada di File SDA, tapi TIDAK ADA di File JKT", missing_in_jkt_sda, map_sda_source)

# B. Ada di JKT, Tidak ada di Cabang
extra_in_jkt_sda = set(map_jkt_sda.keys()) - set(map_sda_source.keys())
print_detail_report("Ada di File JKT (Port SDA), tapi TIDAK ADA di File SDA", extra_in_jkt_sda, map_jkt_sda)

üöÄ [4/4] MENJALANKAN AUDIT DETAIL (UNIT + LOKASI FILE)...
   -> Scanning File SBY...
   -> Scanning File SDA...
   -> Scanning File JKT (Filter Port SBY)...
   -> Scanning File JKT (Filter Port SDA)...

   AUDIT DETAIL: SURABAYA (SBY)

üëâ Ada di File SBY, tapi TIDAK ADA di File JKT [Total: 0]:
   ‚úÖ NIHIL (Data Sinkron)

üëâ Ada di File JKT (Port SBY), tapi TIDAK ADA di File SBY [Total: 2]:
   1. SIDE LOUDER BOSS
      Lokasi: JKT Rpt HMU 01-01-2025-31-01-2025.xlsx, JKT Rpt HMU 01-02-2025-28-02-2025.xlsx, JKT Rpt HMU 01-03-2025-31-03-2025.xlsx, JKT Rpt HMU 01-04-2025-30-04-2025.xlsx, JKT Rpt HMU 01-05-2025-31-05-2025.xlsx, JKT Rpt HMU 01-06-2025-30-06-2025.xlsx, JKT Rpt HMU 01-07-2025-31-07-2025.xlsx, JKT Rpt HMU 01-08-2025-31-08-2025.xlsx, JKT Rpt HMU 01-09-2025-30-09-2025.xlsx
   2. TOP LOADER MITS CENDRAWASIH
      Lokasi: JKT Rpt HMU 01-01-2025-31-01-2025.xlsx, JKT Rpt HMU 01-02-2025-28-02-2025.xlsx, JKT Rpt HMU 01-03-2025-31-03-2025.xlsx, JKT Rpt HMU 01-04-2025-30-04-2025.xl

## ANALISA BENCHMARK ALAT BERAT PER CABANG

In [None]:
import pandas as pd
import glob
import os
import re
import gc
import warnings

warnings.filterwarnings('ignore')

# ==============================================================================
# BAGIAN 1: MEMBANGUN MASTER DATA (PORT & JENIS ALAT)
# ==============================================================================
print("üöÄ [1/4] MEMBANGUN MASTER DATA (PORT & JENIS ALAT)...")

# --- Fungsi Normalisasi ---
def normalize_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    name = re.sub(r'[/\-._]', ' ', name)
    name = name.replace('FORKLIF ', 'FORKLIFT ')
    if name.endswith('FORKLIF'): name = name + 'T'
    name = " ".join(name.split())
    return name

# --- Load Data Cabang ---
base_folder = "Data Setiap Cabang"
# Menggunakan pola recursive untuk mencari di semua subfolder
branch_files = glob.glob(os.path.join(base_folder, "**", "*Rpt HMU*.xlsx"), recursive=True) + \
               glob.glob(os.path.join(base_folder, "**", "*Rpt HMU*.csv"), recursive=True)

# Dictionary: {'NAMA_UNIT': {'Port': 'SBY', 'Jenis': 'FORKLIFT'}}
master_unit_map = {}

for f in branch_files:
    try:
        # Hemat RAM: Baca kolom penting saja
        cols_needed = lambda x: x.upper().strip() in ['EQUIP NAME', 'PORT', 'JENIS ALAT']
        
        if f.endswith('.csv'): df = pd.read_csv(f, usecols=cols_needed)
        else: df = pd.read_excel(f, engine='openpyxl') 
        
        df.columns = [str(c).strip().upper() for c in df.columns]
        
        if 'EQUIP NAME' in df.columns and 'PORT' in df.columns:
            # Pastikan kolom JENIS ALAT ada, jika tidak isi default
            if 'JENIS ALAT' not in df.columns: df['JENIS ALAT'] = 'UNKNOWN'
            
            # Ambil data unik
            df_unique = df[['EQUIP NAME', 'PORT', 'JENIS ALAT']].dropna(subset=['EQUIP NAME', 'PORT']).drop_duplicates()
            
            for _, row in df_unique.iterrows():
                clean_name = normalize_unit_name(row['EQUIP NAME'])
                port = str(row['PORT']).upper().strip()
                jenis = str(row['JENIS ALAT']).upper().strip()
                
                # Simpan (Priority: Update jika ada data baru)
                if clean_name:
                    master_unit_map[clean_name] = {'Port': port, 'Jenis_Alat': jenis}
                    
        del df; gc.collect()
        
    except Exception as e:
        pass # Skip file error

print(f"‚úÖ Master Data Siap. Teridentifikasi {len(master_unit_map)} unit dengan PORT yang jelas.")

üöÄ [1/3] MEMBANGUN MASTER DATA (PORT & JENIS ALAT)...
‚úÖ Master Data Siap. Teridentifikasi 245 unit dengan PORT yang jelas.


In [None]:
# ==============================================================================
# BAGIAN 2: MENGHITUNG KONSUMSI BBM & HM (LOGIKA BARU: DELTA HARIAN)
# ==============================================================================
print("üöÄ [2/4] MENGHITUNG FUEL RATIO (METODE DELTA HARIAN)...")

# Menggunakan File Utama XLSX (Sesuai Request Terakhir)
filename_excel = 'BBM AAB.xlsx'
target_sheets = ['JAN', 'FEB', 'MAR', 'APR', 'MEI', 'JUN', 'JUL', 'AGT', 'SEP', 'OKT', 'NOV']

benchmark_data = [] # List penampung hasil akhir per unit per bulan
raw_daily_list = [] # List penampung data harian sementara

if os.path.exists(filename_excel):
    try:
        xls = pd.ExcelFile(filename_excel)
        print(f"   -> File BBM ditemukan. Memproses Sheet JAN-NOV...")
        
        for sheet in target_sheets:
            if sheet in xls.sheet_names:
                # print(f"      Reading: {sheet}...") 
                
                # Baca Sheet Tanpa Header
                df = pd.read_excel(xls, sheet_name=sheet, header=None)
                
                # --- PARSING STRUKTUR (Unit Baris 0, Metric Baris 2) ---
                unit_names = df.iloc[0].ffill()
                headers = df.iloc[2]
                dates = df.iloc[3:, 0] # Tanggal Kolom A
                
                # Loop Kolom Data
                for col in range(1, df.shape[1]):
                    header_str = str(headers[col]).strip().upper()
                    
                    if header_str in ['HM', 'LITER', 'KELUAR', 'PEMAKAIAN']:
                        metric_type = 'HM' if header_str == 'HM' else 'LITER'
                        
                        unit_raw = str(unit_names[col])
                        unit_clean = normalize_unit_name(unit_raw)
                        
                        # HANYA PROSES JIKA UNIT ADA DI MASTER MAP (Punya PORT)
                        if unit_clean in master_unit_map:
                            vals = pd.to_numeric(df.iloc[3:, col], errors='coerce')
                            
                            temp_df = pd.DataFrame({
                                'Date': dates,
                                'Month': sheet, # Simpan nama bulan untuk grouping nanti
                                'Unit_Name': unit_clean,
                                'Metric': metric_type,
                                'Value': vals
                            })
                            
                            temp_df.dropna(subset=['Value', 'Date'], inplace=True)
                            if not temp_df.empty:
                                raw_daily_list.append(temp_df)
                
                del df; gc.collect()

    except Exception as e:
        print(f"   ‚ùå Error membaca Excel: {e}")
else:
    print(f"   ‚ùå File '{filename_excel}' TIDAK DITEMUKAN.")

# --- PROSES PERHITUNGAN DELTA & AGREGASI ---
if raw_daily_list:
    print("   -> Mengolah Data Harian (Pivot & Delta)...")
    df_all = pd.concat(raw_daily_list, ignore_index=True)
    
    # 1. Konversi Tanggal
    df_all['Date'] = pd.to_datetime(df_all['Date'], dayfirst=True, errors='coerce')
    df_all.dropna(subset=['Date'], inplace=True)
    
    # 2. Pivot Table Harian (Unit + Tanggal + Bulan)
    # Kita sertakan 'Month' di index agar nanti mudah di-grouping kembali
    # (Asumsi: Tanggal di Excel konsisten dengan Bulan Sheetnya)
    df_pivot = df_all.pivot_table(
        index=['Unit_Name', 'Month', 'Date'], 
        columns='Metric',
        values='Value',
        aggfunc='sum'
    ).reset_index()
    
    if 'HM' not in df_pivot.columns: df_pivot['HM'] = 0
    if 'LITER' not in df_pivot.columns: df_pivot['LITER'] = 0
    
    # 3. Sortir Kronologis (WAJIB UNTUK DELTA)
    df_pivot.sort_values(by=['Unit_Name', 'Date'], inplace=True)
    
    # 4. HITUNG DELTA HM (Hari Ini - Kemarin)
    df_pivot['Delta_HM'] = df_pivot.groupby('Unit_Name')['HM'].diff()
    
    # --- CLEANING RULES (SESUAI LOGIKA TERAKHIR) ---
    # a. Reset negatif jadi 0
    df_pivot.loc[df_pivot['Delta_HM'] < 0, 'Delta_HM'] = 0
    # b. Hari pertama NaN jadi 0
    df_pivot['Delta_HM'] = df_pivot['Delta_HM'].fillna(0)
    # c. FILTER > 24 JAM DIHAPUS (Sesuai request agar Landak & Boss 3 terhitung)
    # df_pivot.loc[df_pivot['Delta_HM'] > 24, 'Delta_HM'] = 0 
    
    # 5. AGREGASI PER BULAN (Untuk masuk ke benchmark_data)
    # Group by Unit & Month -> Sum Liter & Sum Delta HM
    df_monthly_agg = df_pivot.groupby(['Unit_Name', 'Month']).agg({
        'LITER': 'sum',
        'Delta_HM': 'sum'
    }).reset_index()
    
    # 6. MAPPING KEMBALI PORT & JENIS ALAT
    for _, row in df_monthly_agg.iterrows():
        unit = row['Unit_Name']
        # Pastikan unit ada di map (seharusnya ada karena sudah difilter di awal)
        if unit in master_unit_map:
            info = master_unit_map[unit]
            
            total_liter = row['LITER']
            total_hm = row['Delta_HM']
            
            # Hanya masukkan jika ada data valid
            if total_liter > 0 or total_hm > 0:
                benchmark_data.append({
                    'Unit_Name': unit,
                    'Month': row['Month'], # JAN, FEB, dst
                    'Port': info['Port'],
                    'Jenis_Alat': info['Jenis_Alat'],
                    'Total_Liter': total_liter,
                    'Total_HM_Work': total_hm
                })

print(f"‚úÖ Perhitungan Selesai. Terkumpul {len(benchmark_data)} record bulanan valid.")

üöÄ [2/3] MENGHITUNG FUEL RATIO (METODE DELTA HARIAN)...
   -> File BBM ditemukan. Memproses Sheet JAN-NOV...
   -> Mengolah Data Harian (Pivot & Delta)...
‚úÖ Perhitungan Selesai. Terkumpul 2525 record bulanan valid.


In [None]:
# ==============================================================================
# BAGIAN 3: AGREGASI BENCHMARK & EXPORT (DENGAN DAFTAR UNIT)
# ==============================================================================
print("üöÄ [3/4] MEMBUAT LAPORAN BENCHMARK DENGAN DETAIL POPULASI...")

if 'benchmark_data' in locals() and benchmark_data:
    df_bm = pd.DataFrame(benchmark_data)
    
    # Fungsi untuk menggabungkan nama unit jadi satu string
    def list_units(x):
        return ", ".join(sorted(x.unique()))
    
    # 1. GROUPING & AGGREGATION
    # Menggunakan Named Aggregation agar nama kolom langsung rapi
    df_summary = df_bm.groupby(['Port', 'Jenis_Alat']).agg(
        Populasi_Unit=('Unit_Name', 'nunique'),        # Hitung Jumlah Unit
        Daftar_Unit=('Unit_Name', list_units),         # <--- KOLOM BARU: List Nama Unit
        Total_Liter=('Total_Liter', 'sum'),            # Total BBM
        Total_HM_Work=('Total_HM_Work', 'sum')         # Total Jam Kerja
    ).reset_index()
    
    # 2. HITUNG RATIO (LITER / JAM)
    df_summary['Avg_Fuel_Ratio'] = df_summary['Total_Liter'] / df_summary['Total_HM_Work']
    
    # Handle division by zero & infinity
    df_summary['Avg_Fuel_Ratio'] = df_summary['Avg_Fuel_Ratio'].fillna(0).replace([float('inf'), -float('inf')], 0)
    
    # Formatting
    df_summary['Avg_Fuel_Ratio'] = df_summary['Avg_Fuel_Ratio'].round(2)
    df_summary.sort_values(['Port', 'Jenis_Alat'], inplace=True)
    
    
    # 3. EXPORT KE EXCEL
    output_file = 'Analisa_Benchmark_Per_Cabang.xlsx'
    try:
        with pd.ExcelWriter(output_file) as writer:
            # Sheet 1: Summary Benchmark (Sekarang ada kolom Daftar_Unit)
            df_summary.to_excel(writer, sheet_name='Benchmark_Summary', index=False)
            
            # Sheet 2: Detail Data (Untuk pengecekan per bulan)
            df_bm.sort_values(['Port', 'Jenis_Alat', 'Month'], inplace=True)
            df_bm.to_excel(writer, sheet_name='Raw_Data_Detail', index=False)
            
        print(f"\nüíæ  SUKSES! File Benchmark tersimpan: '{output_file}'")
        print("    ‚úÖ Kolom 'Daftar_Unit' berhasil ditambahkan.")
        
        # PREVIEW
        print("\n--- PREVIEW DATA (Kolom Daftar_Unit dipotong agar muat) ---")
        preview_df = df_summary[['Port', 'Jenis_Alat', 'Populasi_Unit', 'Avg_Fuel_Ratio', 'Daftar_Unit']].copy()
        # Potong string daftar unit buat preview di layar aja
        preview_df['Daftar_Unit'] = preview_df['Daftar_Unit'].apply(lambda x: x[:50] + "..." if len(x) > 50 else x)
        print(preview_df.head(10).to_string(index=False))
        
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal simpan Excel: {e}")
        
else:
    print("‚ùå Tidak ada data yang bisa dianalisa. Pastikan Block 2 sudah dijalankan.")

üöÄ [3/3] MEMBUAT LAPORAN BENCHMARK DENGAN DETAIL POPULASI...

üíæ  SUKSES! File Benchmark tersimpan: 'Analisa_Benchmark_Per_Cabang.xlsx'
    ‚úÖ Kolom 'Daftar_Unit' berhasil ditambahkan.

--- PREVIEW DATA (Kolom Daftar_Unit dipotong agar muat) ---
Port    Jenis_Alat  Populasi_Unit  Avg_Fuel_Ratio               Daftar_Unit
 AMB      FORKLIFT              1            2.64                    JATAYU
 BAU      FORKLIFT              1            4.50                GATUT KACA
 BAU REACH STACKER              2           13.69            SANY KUPP, SMV
 BAU       TRAILER              1            2.51                 L 9049 US
 BAU       TRONTON              2            3.43      B 9273 NK, L 8625 UT
 BIA      FORKLIFT              1            0.00 TOBATI (EX FL KALMAR 32T)
 BLW REACH STACKER              1           15.72                 KALMAR 25
 BMS      FORKLIFT              1            3.63                     RANDU
 BMS REACH STACKER              1           13.11                

In [None]:
import numpy as np # Pastikan library ini ada

# ==============================================================================
# BAGIAN 4: ANALISA PERBANDINGAN (MATRIKS EFISIENSI) - LOGIC UPDATE
# ==============================================================================
print("üöÄ [4/4] MEMBUAT MATRIKS PERBANDINGAN ANTAR CABANG...")

if 'df_summary' in locals() and not df_summary.empty:
    
    # 1. PIVOT TABLE
    df_pivot = df_summary.pivot(index='Jenis_Alat', columns='Port', values='Avg_Fuel_Ratio')
    
    # --- REVISI 1: Zero to Null ---
    # Ubah 0 menjadi NaN agar tidak dianggap sebagai "Ratio Terbaik" (karena 0 biasanya error)
    df_pivot = df_pivot.replace(0, np.nan)
    
    # 2. HITUNG BEST (Minimum = Terbaik)
    # idxmin otomatis mengabaikan NaN
    best_branch_series = df_pivot.idxmin(axis=1)
    best_ratio_series = df_pivot.min(axis=1)
    
    # 3. HITUNG WORST (Maksimum = Terboros)
    # --- REVISI 2: Logic Single Branch ---
    # Hitung dulu ada berapa cabang yang punya data valid (tidak NaN) di baris ini
    valid_branch_count = df_pivot.count(axis=1)
    
    # Cari nilai max normal dulu
    worst_branch_series = df_pivot.idxmax(axis=1)
    worst_ratio_series = df_pivot.max(axis=1)
    
    # Jika jumlah cabang valid <= 1, maka TIDAK ADA Worst Branch (Karena dia bermain sendiri)
    # Kita timpa hasilnya dengan NaN atau "-"
    mask_single = valid_branch_count <= 1
    
    # Kita perlu cast ke object/string dulu agar bisa diisi tanda "-"
    worst_branch_series = worst_branch_series.astype(object)
    worst_ratio_series = worst_ratio_series.astype(object)
    
    worst_branch_series[mask_single] = "-"
    worst_ratio_series[mask_single] = "-"
    
    # 4. GABUNGKAN KE TABEL UTAMA
    df_compare = df_pivot.copy()
    
    # Agar tampilan NaN di tabel utama jadi strip (-) biar rapi (Opsional)
    # df_compare = df_compare.fillna("-") 
    
    df_compare['Best_Branch'] = best_branch_series
    df_compare['Best_Ratio'] = best_ratio_series
    df_compare['Worst_Branch'] = worst_branch_series
    df_compare['Worst_Ratio'] = worst_ratio_series
    
    # --- REVISI 3: Hapus Potential Saving Percent (Dihilangkan) ---
    
    # Reset Index
    df_compare.reset_index(inplace=True)
    
    # 5. EXPORT KE EXCEL
    output_file = 'Analisa_Benchmark_Per_Cabang.xlsx'
    
    try:
        with pd.ExcelWriter(output_file, mode='a', if_sheet_exists='replace') as writer:
            df_compare.to_excel(writer, sheet_name='Head_to_Head_Matrix', index=False)
            
        print(f"\nüíæ  SUKSES! Matriks perbandingan tersimpan.")
        print("    -> Ratio 0 dianggap Null (tidak masuk hitungan).")
        print("    -> Jika hanya 1 cabang, Worst Branch dikosongkan.")
        
        # PREVIEW
        # Ambil kolom Port secara dinamis
        port_cols = [c for c in df_pivot.columns]
        cols_view = ['Jenis_Alat'] + port_cols + ['Best_Branch', 'Worst_Branch']
        
        print("\n--- PREVIEW MATRIX (TOP 10) ---")
        # Fillna dengan '-' hanya untuk preview di layar agar enak dilihat
        print(df_compare[cols_view].head(10).fillna("-").to_string(index=False))
        
    except Exception as e:
        print(f"‚ö†Ô∏è Gagal update Excel (Pastikan file tertutup): {e}")

else:
    print("‚ùå Data Summary belum tersedia. Jalankan Block 3 terlebih dahulu.")

üöÄ [4/3] MEMBUAT MATRIKS PERBANDINGAN ANTAR CABANG...

üíæ  SUKSES! Matriks perbandingan tersimpan.
    -> Ratio 0 dianggap Null (tidak masuk hitungan).
    -> Jika hanya 1 cabang, Worst Branch dikosongkan.

--- PREVIEW MATRIX (TOP 10) ---
   Jenis_Alat   AMB    BAU BIA    BLW    BMS   BPN   BRU    JKT   JYP    KAI   KDR    KTG    LUW    MRI   NBR   PAL    PNK   SBY    SDA    SRI    TIM   TRK    TTE   TUA Best_Branch Worst_Branch
     BULDOZER     -      -   -      -      -     -     -      -     -      -     -      -      -      -     -     -      -  0.52      -      -      -     -      -     -         SBY            -
        CRANE     -      -   -      -      -     -     -      -     -   6.57     -      -      -      -     -     -      -  5.89      -      -      -     -      -  6.67         SBY          TUA
     FORKLIFT  2.64    4.5   -      -   3.63  2.67     -   4.48  3.97      -  8.33   2.42  11.26    7.9  3.41     -   5.51  2.86   3.02   3.09   2.55  6.64  11.61  11.7       