In [3]:
import pandas as pd
import csv
import gc # Garbage Collector (Pembersih Memori)
import matplotlib.pyplot as plt
import seaborn as sns
from python_calamine import CalamineWorkbook # Engine Rust

# Konfigurasi Nama File
excel_filename = 'BBM AAB.xlsx'       # File Excel Raksasa Anda
temp_csv_filename = 'temp_bbm_rust.csv' # File CSV sementara (Buffer)

print("✅ Library berhasil di-load. Siap memproses data.")

✅ Library berhasil di-load. Siap memproses data.


In [4]:
def convert_excel_to_csv_rust(input_file, output_csv):
    print(f"--- MEMULAI ENGINE RUST (CALAMINE) ---")
    
    try:
        # 1. Buka File Excel (Memory Mapping, sangat ringan)
        wb = CalamineWorkbook.from_path(input_file)
        print(f"File berhasil dibuka. Sheet yang ditemukan: {wb.sheet_names}")
    except Exception as e:
        print(f"❌ Gagal membuka file: {e}")
        return False

    # 2. Siapkan File CSV
    with open(output_csv, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        # Tulis Header Kolom untuk CSV
        writer.writerow(['Date', 'Month', 'Unit', 'Metric', 'Value'])
        
        total_rows = 0
        
        # 3. Loop per Sheet
        for sheet_name in wb.sheet_names:
            print(f"Memproses Sheet: {sheet_name}...", end=" ", flush=True)
            
            try:
                # Ambil data sheet sebagai List of Lists (Sangat Cepat)
                rows = wb.get_sheet_by_name(sheet_name).to_python()
                
                if not rows or len(rows) < 3:
                    print("Skipped (Sheet Kosong/Header rusak).")
                    continue

                # --- LOGIKA MAPPING HEADER (Manual) ---
                # Baris 0 = Nama Unit | Baris 2 = Metrik (HM/Liter)
                header_units = rows[0]
                header_metrics = rows[2]
                
                valid_columns = [] # Menyimpan index kolom yang valid
                current_unit = None
                
                # Loop menyamping (Kolom) untuk mendeteksi Header
                for idx in range(len(header_units)):
                    # Ambil Nama Unit (Handle Merged Cells)
                    val_unit = header_units[idx]
                    if val_unit is not None:
                        current_unit = str(val_unit).strip()
                    
                    # Ambil Metrik
                    val_metric = header_metrics[idx] if idx < len(header_metrics) else None
                    
                    # Jika Unit dan Metrik ada, simpan index ini
                    if current_unit and val_metric:
                        metric_clean = str(val_metric).strip().upper()
                        if metric_clean in ['HM', 'KM', 'LITER', 'KELUAR', 'MASUK']:
                            valid_columns.append((idx, current_unit, metric_clean))
                
                if not valid_columns:
                    print("Tidak ada kolom HM/LITER valid.")
                    continue
                
                # --- LOGIKA STREAMING DATA (Mulai Baris 3) ---
                sheet_row_count = 0
                empty_streak = 0
                
                # Loop ke bawah (Baris Data)
                for row_idx in range(3, len(rows)):
                    row_data = rows[row_idx]
                    
                    # Cek Kolom Tanggal (Index 0)
                    if not row_data or row_data[0] is None:
                        empty_streak += 1
                        if empty_streak > 10: break # Stop jika 10 baris kosong
                        continue
                    else:
                        empty_streak = 0
                        date_val = row_data[0]
                    
                    # Ambil Data dari Kolom Valid saja
                    for col_idx, unit, metric in valid_columns:
                        if col_idx < len(row_data):
                            val = row_data[col_idx]
                            
                            # Hanya tulis jika ada nilai (Bukan None/0) -> Hemat Disk Space
                            if val is not None and val != 0 and val != '':
                                writer.writerow([date_val, sheet_name, unit, metric, val])
                                sheet_row_count += 1
                                total_rows += 1
                
                print(f"OK (+{sheet_row_count} data)")
                
                # Bersihkan memori segera
                del rows
                gc.collect()

            except Exception as e:
                print(f"Error di sheet {sheet_name}: {e}")
                
    print(f"\n✅ SUKSES BESAR! Total {total_rows} baris data tersimpan di CSV.")
    return True

In [5]:
# Jalankan Konversi
status = convert_excel_to_csv_rust(excel_filename, temp_csv_filename)

--- MEMULAI ENGINE RUST (CALAMINE) ---
File berhasil dibuka. Sheet yang ditemukan: ['JAN', 'FEB', 'MAR', 'APR', 'MEI', 'JUN', 'JUL', 'AGT', 'SEP', 'OKT', 'NOV']
Memproses Sheet: JAN... OK (+7757 data)
Memproses Sheet: FEB... OK (+7043 data)
Memproses Sheet: MAR... OK (+7819 data)
Memproses Sheet: APR... OK (+7591 data)
Memproses Sheet: MEI... OK (+7876 data)
Memproses Sheet: JUN... OK (+7636 data)
Memproses Sheet: JUL... OK (+7903 data)
Memproses Sheet: AGT... OK (+7840 data)
Memproses Sheet: SEP... OK (+7597 data)
Memproses Sheet: OKT... OK (+7795 data)
Memproses Sheet: NOV... OK (+7534 data)

✅ SUKSES BESAR! Total 84391 baris data tersimpan di CSV.


In [6]:
if status:
    print("Membaca data dari CSV ke Pandas...")
    
    # Baca CSV
    df = pd.read_csv(temp_csv_filename)
    
    # Rapikan Format Tanggal & Angka
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df['Value'] = pd.to_numeric(df['Value'], errors='coerce')
    
    # Hapus data yang tanggal/nilainya rusak
    df.dropna(subset=['Date', 'Value'], inplace=True)
    
    print(f"✅ Data Frame Siap! Ukuran: {df.shape[0]} baris.")
    print(df.head())
else:
    print("❌ Stop. Proses konversi di Cell sebelumnya gagal.")

Membaca data dari CSV ke Pandas...
✅ Data Frame Siap! Ukuran: 31756 baris.
        Date Month        Unit Metric    Value
0 2025-01-01   JAN       LINDE     HM  16280.0
1 2025-01-01   JAN   KALMAR 32     HM  21492.0
2 2025-01-01   JAN  KONECRANES     HM   4006.0
3 2025-01-01   JAN         SMV     HM   7528.0
4 2025-01-01   JAN   KALMAR 20     HM   2093.0


In [7]:
if 'df' in locals():
    print("Sedang melakukan Pivot Table...")
    
    # Pivot Data
    df_tidy = df.pivot_table(
        index=['Date', 'Month', 'Unit'], 
        columns='Metric', 
        values='Value', 
        aggfunc='sum'
    ).reset_index()
    
    # Isi NaN dengan 0 (agar bisa dihitung matematikanya)
    for col in ['HM', 'LITER', 'KELUAR']:
        if col not in df_tidy.columns: df_tidy[col] = 0
        else: df_tidy[col] = df_tidy[col].fillna(0)
        
    # Buat Kategori (Alat Berat vs Storage)
    def get_category(name):
        name = str(name).upper()
        if any(x in name for x in ['TANGKI', 'SPBU', 'BUNKER', 'GENSET']): return 'Storage'
        elif any(x in name for x in ['KALMAR', 'LINDE', 'CRANE', 'SMV', 'KONECRANES']): return 'Alat Berat'
        elif name.startswith('L ') or name.startswith('B '): return 'Truk'
        else: return 'Lainnya'

    df_tidy['Category'] = df_tidy['Unit'].apply(get_category)
    
    # --- HITUNG EFISIENSI (LPH) ---
    # Sortir dulu biar urut tanggal
    df_tidy.sort_values(by=['Unit', 'Date'], inplace=True)
    
    # Hitung Selisih HM (Jam Kerja Hari Ini - Kemarin)
    df_tidy['Prev_HM'] = df_tidy.groupby('Unit')['HM'].shift(1)
    df_tidy['Delta_HM'] = df_tidy['HM'] - df_tidy['Prev_HM']
    
    # Bersihkan Data HM yang aneh (Minus atau > 24 jam sehari)
    df_tidy.loc[(df_tidy['Delta_HM'] < 0) | (df_tidy['Delta_HM'] > 24), 'Delta_HM'] = pd.NA
    
    # Rumus Liter Per Hour
    df_tidy['LPH'] = df_tidy['LITER'] / df_tidy['Delta_HM']
    
    print("✅ Analisa Perhitungan Selesai.")

Sedang melakukan Pivot Table...
✅ Analisa Perhitungan Selesai.


In [8]:
if 'df_tidy' in locals():
    # Filter Data Khusus Alat Berat
    alat_berat = df_tidy[df_tidy['Category'] == 'Alat Berat']
    
    # --- VISUALISASI 1: Boxplot Efisiensi (Cari Alat Boros) ---
    plt.figure(figsize=(12, 6))
    
    # Ambil Top 15 Unit Paling Aktif (Biar grafik tidak penuh sesak)
    top_15_units = alat_berat.groupby('Unit')['LITER'].sum().nlargest(15).index
    plot_data = alat_berat[alat_berat['Unit'].isin(top_15_units)]
    
    sns.boxplot(data=plot_data, x='Unit', y='LPH')
    plt.title('Distribusi Efisiensi BBM (Liter/Jam) - Top 15 Unit')
    plt.ylabel('Liter / Jam (LPH)')
    plt.xticks(rotation=45, ha='right')
    plt.ylim(0, 60) # Zoom grafik (0 sd 60 Liter/Jam)
    plt.grid(axis='y', linestyle='--', alpha=0.5)
    plt.show() # Tampilkan Gambar
    
    # --- VISUALISASI 2: Rekonsiliasi (Gudang vs Unit) ---
    total_out = df_tidy[df_tidy['Category'] == 'Storage'].groupby('Month')['KELUAR'].sum()
    total_in = df_tidy[df_tidy['Category'] != 'Storage'].groupby('Month')['LITER'].sum()
    
    recon_df = pd.DataFrame({'Keluar Gudang': total_out, 'Masuk Unit': total_in})
    
    # Urutkan Bulan
    urutan_bulan = ['JAN', 'FEB', 'MAR', 'APR', 'MEI', 'JUN', 'JUL', 'AGT', 'SEP', 'OKT', 'NOV']
    recon_df = recon_df.reindex(urutan_bulan)
    
    print("\n=== TABEL REKONSILIASI STOK ===")
    print(recon_df)
    
    recon_df.plot(kind='bar', figsize=(10, 5), color=['red', 'green'])
    plt.title("Rekonsiliasi: BBM Keluar Gudang vs BBM Masuk Unit")
    plt.ylabel("Total Liter")
    plt.xticks(rotation=45)
    plt.show()

: 