In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import warnings

warnings.filterwarnings('ignore')

# Konfigurasi File
file_bbm = 'BBM AAB.xlsx'
file_master = 'cost & bbm 2022 sd 2025.xlsx'

print("‚úÖ Library siap. Menggunakan NumPy untuk perhitungan statistik.")

‚úÖ Library siap. Menggunakan NumPy untuk perhitungan statistik.


In [4]:
# --- 1. FUNGSI CLEANING ---
def clean_unit_name(name):
    if pd.isna(name): return ""
    name = str(name).upper().strip()
    name = name.replace("FORKLIFT", "FORKLIF")
    name = re.sub(r'[^A-Z0-9]', '', name)
    return name

# --- 2. LOAD MASTER DATA ---
master_data_map = {} 
master_keys_set = set()

if os.path.exists(file_master):
    df_map = pd.read_excel(file_master, sheet_name='Sheet2', header=1)
    
    if 'NAMA ALAT BERAT' in df_map.columns:
        df_map.rename(columns={
            'NAMA ALAT BERAT': 'Unit_Original',
            'ALAT BERAT': 'Jenis_Alat',
            'CAP': 'Capacity'
        }, inplace=True)
    
    df_map.dropna(subset=['Unit_Original'], inplace=True)
    df_map['Unit_ID'] = df_map['Unit_Original'].apply(clean_unit_name)
    df_map = df_map[~df_map['Unit_Original'].astype(str).str.upper().str.contains('DUMMY', na=False)]
    
    # [MANUAL FIX] Capacity L 9025 US = 40
    fix_id = clean_unit_name("L 9025 US")
    df_map.loc[df_map['Unit_ID'] == fix_id, 'Capacity'] = 40
    
    # Cleaning Capacity
    df_map['Capacity'] = pd.to_numeric(df_map['Capacity'], errors='coerce').fillna(0)
    
    # [ATURAN] Exclude Capacity 0
    df_map = df_map[df_map['Capacity'] > 0]
    
    # Simpan ke Dictionary
    for _, row in df_map.iterrows():
        clean_id = row['Unit_ID']
        if clean_id:
            master_data_map[clean_id] = {
                'Unit_Name': row['Unit_Original'],
                'Jenis': row['Jenis_Alat'],
                'Capacity': row['Capacity']
            }
            master_keys_set.add(clean_id)
            
    print(f"‚úÖ Master Data Terbaca: {len(master_data_map)} unit valid.")
else:
    print("‚ùå File Master tidak ditemukan!")

‚úÖ Master Data Terbaca: 241 unit valid.


In [None]:
raw_monthly_data = []
target_sheets = ['JAN', 'FEB', 'MAR', 'APR', 'MEI', 'JUN', 'JUL', 'AGT', 'SEP', 'OKT', 'NOV']

if os.path.exists(file_bbm):
    xls = pd.ExcelFile(file_bbm)
    
    for sheet in target_sheets:
        if sheet in xls.sheet_names:
            print(f"   ... Memproses {sheet}")
            df = pd.read_excel(xls, sheet_name=sheet, header=None)
            
            unit_names_row = df.iloc[0].ffill()
            headers = df.iloc[2]
            
            for col in range(1, df.shape[1]):
                header_str = str(headers[col]).strip().upper()
                
                if header_str in ['HM', 'LITER', 'KELUAR', 'PEMAKAIAN']:
                    raw_unit_name = str(unit_names_row[col]).strip().upper()
                    
                    if raw_unit_name.startswith(('GENSET', 'KOMPRESSOR', 'MESIN', 'TANGKI', 'SPBU', 'MOBIL', 'UNNAMED', 'TOTAL')):
                        continue
                        
                    clean_trx_id = clean_unit_name(raw_unit_name)
                    matched_id = None
                    
                    # [ATURAN] Manual Mapping
                    if "FL RENTAL 01" in raw_unit_name and "TIMIKA" not in raw_unit_name:
                        temp_id = clean_unit_name("FL RENTAL 01 TIMIKA")
                        if temp_id in master_data_map: matched_id = temp_id
                    elif "TOBATI" in raw_unit_name and "KALMAR 32T" in raw_unit_name:
                        temp_id = clean_unit_name("TOP LOADER KALMAR 35T/TOBATI") 
                        if temp_id in master_data_map: matched_id = temp_id
                    elif "L 8477 UUC" in raw_unit_name: 
                        temp_id = clean_unit_name("L 9902 UR / S75")
                        if temp_id in master_data_map: matched_id = temp_id
                    elif "L 9054 UT" in raw_unit_name:
                        temp_id = clean_unit_name("L 9054 UT")
                        if temp_id in master_data_map: matched_id = temp_id
                    
                    # Auto Match
                    if not matched_id and clean_trx_id in master_data_map:
                        matched_id = clean_trx_id
                    
                    # Logic " ("
                    if not matched_id and " (" in raw_unit_name:
                        try:
                            part_before = raw_unit_name.split("(")[0].strip()
                            clean_before = clean_unit_name(part_before)
                            if clean_before in master_data_map: matched_id = clean_before
                        except: pass
                        
                    # Logic "EX."
                    if not matched_id and "EX." in raw_unit_name:
                        try:
                            part_after = raw_unit_name.split("EX.")[-1].replace(")", "").strip()
                            clean_after = clean_unit_name(part_after)
                            if clean_after in master_data_map: matched_id = clean_after
                            elif clean_after:
                                for k in master_keys_set:
                                    if clean_after in k:
                                        matched_id = k; break
                        except: pass
                    
                    if matched_id:
                        vals = pd.to_numeric(df.iloc[3:, col], errors='coerce').sum()
                        if vals > 0:
                            metric = 'HM' if header_str == 'HM' else 'LITER'
                            raw_monthly_data.append({
                                'Bulan': sheet,
                                'Unit_ID': matched_id,
                                'Unit_Name': master_data_map[matched_id]['Unit_Name'],
                                'Metric': metric,
                                'Value': vals
                            })

# Pivot Data Bulanan
df_raw = pd.DataFrame(raw_monthly_data)
if not df_raw.empty:
    df_monthly = df_raw.pivot_table(index=['Unit_ID', 'Unit_Name', 'Bulan'], columns='Metric', values='Value', aggfunc='sum').reset_index()
    df_monthly.fillna(0, inplace=True)
    print(f"‚úÖ Data Bulanan Siap: {len(df_monthly)} baris data.")
else:
    print("‚ùå Tidak ada data transaksi yang cocok.")

   ... Memproses JAN
   ... Memproses FEB
   ... Memproses MAR
   ... Memproses APR
   ... Memproses MEI
   ... Memproses JUN
   ... Memproses JUL
   ... Memproses AGT
   ... Memproses SEP
   ... Memproses OKT
   ... Memproses NOV
‚úÖ Data Bulanan Siap: 2548 baris data.


: 

In [None]:
regression_results = []
min_months = 3 

units = df_monthly['Unit_Name'].unique()

print(f"üìä Melakukan Analisa Regresi pada {len(units)} Unit (Metode NumPy)...")

for unit in units:
    df_u = df_monthly[df_monthly['Unit_Name'] == unit]
    
    # Filter Valid
    df_u = df_u[(df_u['HM'] > 0) & (df_u['LITER'] > 0)]
    
    if len(df_u) >= min_months:
        x_val = df_u['HM'].values
        y_val = df_u['LITER'].values
        
        # --- REGRESI LINEAR DENGAN NUMPY ---
        # Derajat 1 = Linear (Garis Lurus: y = mx + c)
        # slope = m (Konsumsi per jam), intercept = c (Fixed cost)
        slope, intercept = np.polyfit(x_val, y_val, 1)
        
        # Hitung Prediksi untuk cek akurasi
        predict_func = np.poly1d([slope, intercept])
        y_pred = predict_func(x_val)
        
        # Hitung R2 Score (Manual)
        # R2 = 1 - (Sum of Squared Residuals / Total Sum of Squares)
        ss_res = np.sum((y_val - y_pred) ** 2)
        ss_tot = np.sum((y_val - np.mean(y_val)) ** 2)
        
        r2 = 0
        if ss_tot != 0:
            r2 = 1 - (ss_res / ss_tot)
        
        # Hitung Rata-rata Manual
        avg_ratio = df_u['LITER'].sum() / df_u['HM'].sum()
        
        regression_results.append({
            'Unit_Name': unit,
            'Data_Points': len(df_u),
            'Intercept_Base_Load': intercept,
            'Slope_Liter_Per_HM': slope,
            'Average_Ratio_Manual': avg_ratio,
            'R2_Score_Accuracy': r2,
            'Status_Model': 'Valid' if r2 > 0.5 else 'Data Acak/Tidak Konsisten'
        })

df_regresi = pd.DataFrame(regression_results)
df_regresi = df_regresi.sort_values('R2_Score_Accuracy', ascending=False)

print("‚úÖ Selesai. Contoh 5 Unit dengan Pola Paling Konsisten:")
print(df_regresi[['Unit_Name', 'Slope_Liter_Per_HM', 'R2_Score_Accuracy']].head())

In [None]:
# Export ke Excel
output_regresi = 'Hasil_Analisa_Regresi_Unit.xlsx'
df_regresi.to_excel(output_regresi, index=False)
print(f"üíæ Hasil Analisa disimpan ke: {output_regresi}")

# --- VISUALISASI SAMPLE ---
top_units = df_regresi.head(3)['Unit_Name'].tolist()
bottom_units = df_regresi[df_regresi['R2_Score_Accuracy'] > 0].tail(3)['Unit_Name'].tolist()

def plot_regression(unit_name):
    df_u = df_monthly[(df_monthly['Unit_Name'] == unit_name) & (df_monthly['HM'] > 0) & (df_monthly['LITER'] > 0)]
    
    if len(df_u) < 2: return
    
    x_val = df_u['HM'].values
    y_val = df_u['LITER'].values
    
    # Hitung ulang model untuk plotting
    slope, intercept = np.polyfit(x_val, y_val, 1)
    predict_func = np.poly1d([slope, intercept])
    
    # Buat garis prediksi mulus
    x_line = np.linspace(x_val.min(), x_val.max(), 100)
    y_line = predict_func(x_line)
    
    plt.figure(figsize=(8, 5))
    plt.scatter(x_val, y_val, color='blue', label='Data Aktual (Bulan)')
    plt.plot(x_line, y_line, color='red', linewidth=2, label='Garis Regresi (Prediksi Normal)')
    
    plt.title(f"Regresi BBM: {unit_name}\n(Konsumsi: {slope:.2f} L/Jam + {intercept:.2f} L Fixed)\nAkurasi (R2): {1 - (np.sum((y_val - predict_func(x_val))**2) / np.sum((y_val - np.mean(y_val))**2)):.2f}")
    plt.xlabel("Total HM (Jam Kerja)")
    plt.ylabel("Total Liter BBM")
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.show()

print("\n--- GRAFIK UNIT DENGAN POLA KONSUMSI SANGAT KONSISTEN (SEHAT) ---")
for u in top_units:
    plot_regression(u)

print("\n--- GRAFIK UNIT DENGAN POLA KONSUMSI TIDAK TERATUR (PERLU CEK) ---")
for u in bottom_units:
    plot_regression(u)