In [11]:
import pandas as pd
import glob
import os
import warnings

warnings.filterwarnings('ignore')

# BAGIAN 1: LOAD DATA CABANG (REKURSIF) + SIMPAN LOKASI FILE
print("MEMULAI PROSES LOAD DATA CABANG...")

# 1. Konfigurasi Folder
base_folder = "Data Setiap Cabang"

# Cek Folder
if not os.path.exists(base_folder):
    print(f"ERROR: Folder '{base_folder}' tidak ditemukan.")
    # Fallback: Cari di folder sekarang jika folder khusus tidak ada
    branch_files = glob.glob("*Rpt HMU*.csv") + glob.glob("*Rpt HMU*.xlsx")
else:
    # Cari di dalam sub-folder
    search_pattern_csv = os.path.join(base_folder, "**", "*Rpt HMU*.csv")
    search_pattern_xlsx = os.path.join(base_folder, "**", "*Rpt HMU*.xlsx")
    branch_files = glob.glob(search_pattern_csv, recursive=True) + glob.glob(search_pattern_xlsx, recursive=True)

print(f"Ditemukan Total {len(branch_files)} file cabang.")

list_ops = []
unit_source_map = {} 

# 2. Proses File
for f in branch_files:
    try:
        # Nama File Bersih untuk Tracking
        basename = os.path.basename(f)
        clean_filename = basename.replace(' - form2Strokes.csv', '').replace('.xlsx', '').replace('.csv', '')
        
        # Baca File
        if f.endswith('.csv'):
            df = pd.read_csv(f)
        else:
            df = pd.read_excel(f, engine='openpyxl')
            
        # Standardisasi Kolom
        df.columns = [str(c).strip().upper() for c in df.columns]
        
        # Cek Kolom
        if 'EQUIP NAME' in df.columns and 'HMU' in df.columns:
            date_cols = [c for c in df.columns if 'TGL' in c or 'DATE' in c]
            if not date_cols: continue
            
            target_date_col = date_cols[0]
            
            # Ambil Data
            df_subset = df[['EQUIP NAME', target_date_col, 'HMU']].copy()
            df_subset['Date'] = pd.to_datetime(df_subset[target_date_col], dayfirst=True, errors='coerce')
            df_subset['HMU'] = pd.to_numeric(df_subset['HMU'], errors='coerce')
            
            # Tambahkan Kolom Lokasi File
            df_subset['Source_File'] = clean_filename 
            
            # Bersihkan Nama Unit
            df_subset['Unit_Clean'] = df_subset['EQUIP NAME'].astype(str).str.upper().str.strip()
            df_subset['Unit_Clean'] = df_subset['Unit_Clean'].apply(lambda x: " ".join(x.split()))
            
            # Filter Data Valid (Tahun 2025)
            df_subset = df_subset.dropna(subset=['Date', 'HMU'])
            df_subset = df_subset[df_subset['Date'].dt.year == 2025]
            
            if not df_subset.empty:
                # Simpan kolom Source_File juga
                list_ops.append(df_subset[['Unit_Clean', 'Date', 'HMU', 'Source_File']])
                
                # Update Map untuk Analisa Eksistensi
                for u in df_subset['Unit_Clean'].unique():
                    if u not in unit_source_map:
                        unit_source_map[u] = set()
                    unit_source_map[u].add(clean_filename)

    except Exception as e:
        print(f"Gagal membaca {basename}: {e}")

# 3. Gabungkan Data & Aggregasi
if list_ops:
    df_ops_all = pd.concat(list_ops, ignore_index=True)
    df_ops_all['Month'] = df_ops_all['Date'].dt.to_period('M')
    
    # Aggregasi: 
    # - HMU: Ambil nilai Maksimum (Akhir Bulan)
    # - Source_File: Ambil nama filenya (First) agar bisa ditrack
    ops_monthly = df_ops_all.groupby(['Unit_Clean', 'Month']).agg({
        'HMU': 'max',
        'Source_File': 'first'
    }).reset_index()
    
    ops_monthly.rename(columns={'HMU': 'HM_Ops_Max'}, inplace=True)
    
    del df_ops_all
    del list_ops
    
    print(f"DATA CABANG SELESAI DIPROSES.")
    print(f"- Total Unit Teridentifikasi: {len(unit_source_map)}")
else:
    print("TIDAK ADA DATA CABANG YANG TERBACA.")
    ops_monthly = pd.DataFrame()

MEMULAI PROSES LOAD DATA CABANG...
Ditemukan Total 14 file cabang.
DATA CABANG SELESAI DIPROSES.
- Total Unit Teridentifikasi: 245


In [12]:
# BAGIAN 2: LOAD DATA BBM (AAB)
print("MEMULAI PROSES LOAD DATA BBM...")

# Cari file BBM (Bisa .xlsx utama atau pecahan .csv)
bbm_files = glob.glob("BBM AAB.xlsx") + glob.glob("BBM AAB.xlsx - *.csv")
bbm_data = []
units_bbm = set()
unit_bbm_source_map = {} 

month_map = {
    'JAN': '2025-01', 'FEB': '2025-02', 'MAR': '2025-03', 'APR': '2025-04',
    'MEI': '2025-05', 'JUN': '2025-06', 'JUL': '2025-07', 'AGT': '2025-08',
    'SEP': '2025-09', 'OKT': '2025-10', 'NOV': '2025-11', 'DES': '2025-12'
}

for f in bbm_files:
    try:
        # Cek apakah ini file XLSX utuh atau CSV pecahan
        if f.endswith('.xlsx'):
            # Logic untuk XLSX Multi-sheet
            xls = pd.ExcelFile(f)
            sheet_names = xls.sheet_names
        else:
            # Logic untuk CSV pecahan (BBM AAB.xlsx - JAN.csv)
            basename = os.path.basename(f)
            sheet_name_from_file = basename.split(' - ')[-1].replace('.csv', '').strip()
            sheet_names = [sheet_name_from_file] # Treat as list of 1 sheet
        
        for sheet in sheet_names:
            if sheet in month_map:
                period = pd.Period(month_map[sheet])
                
                # Baca Data
                if f.endswith('.xlsx'):
                    df_raw = pd.read_excel(xls, sheet_name=sheet, header=None)
                else:
                    df_raw = pd.read_csv(f, header=None)
                
                # Baris 0: Nama Unit (Forward Fill)
                row_units = df_raw.iloc[0].fillna(method='ffill')
                # Baris 2: Header Kolom
                row_headers = df_raw.iloc[2]
                
                # Cari Kolom HM
                hm_indices = [i for i, x in enumerate(row_headers) if str(x).strip().upper() == 'HM']
                
                for idx in hm_indices:
                    unit_name = str(row_units[idx]).strip().upper()
                    
                    # Filter
                    invalid = ['NAN', 'UNNAMED', 'EQUIP NAME', 'TANGGAL', 'GROUP KPI', 'NAT', 'TOTAL']
                    if not any(x in unit_name for x in invalid):
                        clean_name = " ".join(unit_name.split())
                        units_bbm.add(clean_name)
                        
                        # Track source
                        if clean_name not in unit_bbm_source_map: unit_bbm_source_map[clean_name] = set()
                        unit_bbm_source_map[clean_name].add(sheet)
                        
                        # Ambil HM
                        hm_vals = pd.to_numeric(df_raw.iloc[3:, idx], errors='coerce').dropna()
                        if not hm_vals.empty:
                            max_hm = hm_vals.max()
                            if max_hm > 0:
                                bbm_data.append({
                                    'Unit_Clean': clean_name,
                                    'Month': period,
                                    'HM_BBM_Max': max_hm
                                })

    except Exception as e:
        print(f"Error membaca {f}: {e}")

if bbm_data:
    df_bbm_monthly = pd.DataFrame(bbm_data)
    print(f"DATA BBM SELESAI DIPROSES.")
    print(f"- Total Unit Unik BBM: {len(units_bbm)}")
else:
    df_bbm_monthly = pd.DataFrame()
    print("Data BBM Kosong.")

MEMULAI PROSES LOAD DATA BBM...
DATA BBM SELESAI DIPROSES.
- Total Unit Unik BBM: 300


In [13]:
# BAGIAN 3: ANALISA EKSISTENSI (ABSENSI UNIT)
print("MENJALANKAN ANALISA 1: Cek Eksistensi Unit...")

all_branch_units = set(unit_source_map.keys())
all_bbm_units = units_bbm

# A. Unit di Cabang tapi TIDAK ADA di BBM
branch_only_units = sorted(list(all_branch_units - all_bbm_units))

# B. Unit di BBM tapi TIDAK ADA di Cabang
bbm_only_units = sorted(list(all_bbm_units - all_branch_units))

# Print Hasil
print(f"\nUnit Cabang yang TIDAK ADA di BBM: {len(branch_only_units)} Unit")
branch_only_data = []
for u in branch_only_units:
    files = ", ".join(sorted(list(unit_source_map[u])))
    print(f"   - {u} (Sumber File: {files})")
    branch_only_data.append({'Unit': u, 'Found_In_File': files})

print(f"\nUnit BBM yang TIDAK ADA di Cabang: {len(bbm_only_units)} Unit")
bbm_only_data = []
for u in bbm_only_units[:10]: # Print sample 10 aja
    months = ", ".join(sorted(list(unit_bbm_source_map[u])))
    print(f"   - {u} (Bulan: {months})")
    bbm_only_data.append({'Unit': u, 'Found_In_Month': months})

# Masukkan sisa data BBM ke list excel
for u in bbm_only_units[10:]:
    months = ", ".join(sorted(list(unit_bbm_source_map[u])))
    bbm_only_data.append({'Unit': u, 'Found_In_Month': months})

MENJALANKAN ANALISA 1: Cek Eksistensi Unit...

Unit Cabang yang TIDAK ADA di BBM: 3 Unit
   - DUMMY (JANGAN DIPAKAI) (Sumber File: JKT Rpt HMU 01-10-2025-31-10-2025, SBY Rpt HMU 01-10-2025-02-01-2026)
   - DUMMY BUNKER (JANGAN DIPAKAI) (Sumber File: JKT Rpt HMU 01-10-2025-31-10-2025, SBY Rpt HMU 01-10-2025-02-01-2026)
   - FORKLIF MITS/KENANGA (Sumber File: JKT Rpt HMU 01-01-2025-31-01-2025, JKT Rpt HMU 01-02-2025-28-02-2025, JKT Rpt HMU 01-03-2025-31-03-2025, JKT Rpt HMU 01-04-2025-30-04-2025, JKT Rpt HMU 01-05-2025-31-05-2025, JKT Rpt HMU 01-06-2025-30-06-2025, JKT Rpt HMU 01-07-2025-31-07-2025, JKT Rpt HMU 01-08-2025-31-08-2025, JKT Rpt HMU 01-09-2025-30-09-2025, JKT Rpt HMU 01-10-2025-31-10-2025, JKT Rpt HMU 01-11-2025-30-11-2025, JKT Rpt HMU 01-12-2025-31-12-2025, SBY Rpt HMU 01-10-2025-02-01-2026)

Unit BBM yang TIDAK ADA di Cabang: 58 Unit
   - CONVEY01 (Bulan: AGT, APR, FEB, JAN, JUL, JUN, MAR, MEI, NOV, OKT, SEP)
   - CRANE P&H 127 T/KOMODO (Bulan: AGT, APR, FEB, JAN, JUL, JUN

In [16]:
# BAGIAN 4: ANALISA HM & EXPORT EXCEL (UPDATE LOKASI FILE)
print("\nMENJALANKAN ANALISA 2: Perbandingan HM Bulanan...")

if not ops_monthly.empty and not df_bbm_monthly.empty:
    # Gabungkan data
    df_compare = pd.merge(ops_monthly, df_bbm_monthly, on=['Unit_Clean', 'Month'], how='outer')
    
    # Hitung Selisih
    df_compare['Selisih'] = df_compare['HM_Ops_Max'] - df_compare['HM_BBM_Max']
    
    # Rapikan Kolom: Ganti NaN pada Source_File dengan '-'
    if 'Source_File' in df_compare.columns:
        df_compare['Source_File'].fillna('-', inplace=True)
        # Rename agar lebih jelas di Excel
        df_compare.rename(columns={'Source_File': 'Lokasi_File_Cabang'}, inplace=True)
    
    # Cari Anomali
    anomalies = df_compare[df_compare['Selisih'] < -10].copy()
    anomalies.sort_values('Selisih', inplace=True)
    
    print(f"-> Total Data Perbandingan: {len(df_compare)} baris")
    print(f"-> Ditemukan {len(anomalies)} Anomali")
    
    if not anomalies.empty:
        # Pastikan kolom Lokasi File ikut di-print di preview
        cols_to_show = ['Unit_Clean', 'Month', 'HM_Ops_Max', 'HM_BBM_Max', 'Selisih']
        if 'Lokasi_File_Cabang' in anomalies.columns:
            cols_to_show.append('Lokasi_File_Cabang')
            
        print("\nTOP 5 ANOMALI HM TERBESAR:")
        print(anomalies[cols_to_show].head(5))
else:
    df_compare = pd.DataFrame()
    anomalies = pd.DataFrame()

# EXPORT KE EXCEL
output_file = 'Analisa_Cabang.xlsx'
try:
    with pd.ExcelWriter(output_file) as writer:
        if branch_only_data:
            pd.DataFrame(branch_only_data).to_excel(writer, sheet_name='Missing_In_BBM', index=False)
        
        if bbm_only_data:
            pd.DataFrame(bbm_only_data).to_excel(writer, sheet_name='Missing_In_Branch', index=False)
            
        if not df_compare.empty:
            # Pastikan kolom Lokasi_File_Cabang ada di urutan yang enak dilihat
            cols = list(df_compare.columns)
            # Pindahkan Lokasi_File_Cabang ke setelah Unit_Clean jika ada
            if 'Lokasi_File_Cabang' in cols:
                cols.insert(2, cols.pop(cols.index('Lokasi_File_Cabang')))
                df_compare = df_compare[cols]
                
            df_compare.to_excel(writer, sheet_name='HM_Comparison_Detail', index=False)
            
        if not anomalies.empty:
            anomalies.to_excel(writer, sheet_name='HM_Anomalies_Alert', index=False)
            
    print(f"\nFile hasil analisa tersimpan sebagai: '{output_file}'")
except Exception as e:
    print(f"\nGagal menyimpan file Excel: {e}")


MENJALANKAN ANALISA 2: Perbandingan HM Bulanan...
-> Total Data Perbandingan: 2908 baris
-> Ditemukan 0 Anomali

File hasil analisa tersimpan sebagai: 'Analisa_Cabang.xlsx'
