In [1]:
import pandas as pd
from datetime import timedelta
import numpy as np
from caoa_solver import run_priority_simulation
from joblib import Parallel, delayed
import multiprocessing

In [2]:
# Test Files
input = 'split_by_month/Voyage_Data_2025_01.csv'
output = 'results_file/current/optimized_schedule.csv'

In [3]:
# 1. Load Data
def load_data():
    ports_df = pd.read_csv('port_data.csv')
    voyages_df = pd.read_csv(input)
    
    # Konversi string tanggal ke datetime object
    voyages_df['ETA_Planned'] = pd.to_datetime(voyages_df['ETA_Planned'])
    
    # Buat Dictionary Kapasitas Pelabuhan: {'TANJUNG PRIOK': 3, 'SURABAYA': 2, ...}
    port_capacity = dict(zip(ports_df['Nama_Pelabuhan'], ports_df['Total_Berths']))
    
    return voyages_df, port_capacity

# 2. Kelas untuk Melacak Status Pelabuhan
class PortManager:
    def __init__(self, capacity_dict):
        # Menyimpan timeline kapan berth kosong
        # Format: {'PRIOK': [waktu_bebas_berth_1, waktu_bebas_berth_2, ...]}
        self.berths = {p: [pd.Timestamp.min] * cap for p, cap in capacity_dict.items()}
        
    def request_berthing(self, port_name, arrival_time, service_duration):
        if port_name not in self.berths:
            # Jika pelabuhan tidak ada di data kapasitas, asumsikan 1 berth
            self.berths[port_name] = [pd.Timestamp.min]
            
        available_slots = self.berths[port_name]
        available_slots.sort() # Urutkan dari yang paling cepat kosong
        
        # Ambil slot yang paling cepat kosong
        earliest_free_time = available_slots[0]
        
        # Hitung waktu sandar aktual (RTA - Realized Time of Arrival)
        # Kapal bisa masuk max(Jadwal Kedatangan, Waktu Berth Kosong)
        actual_berthing_time = max(arrival_time, earliest_free_time)
        
        # Hitung Delay (Antrean)
        waiting_time = (actual_berthing_time - arrival_time).total_seconds() / 3600.0
        
        # Update kapan berth ini akan kosong lagi (ETD)
        departure_time = actual_berthing_time + timedelta(hours=service_duration)
        available_slots[0] = departure_time # Kunci slot ini sampai kapal pergi
        
        return actual_berthing_time, departure_time, waiting_time

# 3. Fungsi Simulasi Utama (Decoder)
def run_simulation(voyages_df, port_capacity):
    # Urutkan seluruh jadwal berdasarkan ETA Planned (Kronologis)
    # Ini logika FCFS murni (Siapa cepat dia dapat)
    queue = voyages_df.sort_values('ETA_Planned').copy()
    
    manager = PortManager(port_capacity)
    
    results = []
    total_system_delay = 0
    
    # Dictionary untuk melacak kapan kapal selesai di pelabuhan sebelumnya
    # Agar delay terpropagasi ke pelabuhan berikutnya
    ship_availability = {} 
    
    for idx, row in queue.iterrows():
        ship = row['Ship_Name']
        port = row['Port_Name']
        planned_eta = row['ETA_Planned']
        duration = row['Service_Time_Hours']
        
        # Cek ketersediaan kapal (Propagasi Delay)
        # Kapal tidak bisa tiba di pelabuhan B sebelum selesai dari pelabuhan A + Sailing Time
        # (Sailing time disederhanakan/diabaikan dulu di snippet ini, 
        # asumsinya ETA_Planned sudah termasuk sailing time ideal)
        
        if ship in ship_availability:
            # Jika kapal telat di pelabuhan sebelumnya, ETA di sini juga mundur
            prev_departure = ship_availability[ship]
            # Estimasi sailing time dari data asli (selisih ETA port ini dgn departure port lalu)
            # Untuk simplifikasi FCFS awal, kita pakai max(Planned, Prev_Departure + Sailing)
            # Di sini kita paksa kapal minimal tiba sesuai planned, atau setelah free dari port lalu
            current_arrival_candidate = max(planned_eta, prev_departure) 
        else:
            current_arrival_candidate = planned_eta
            
        # Minta akses ke Pelabuhan
        rta, etd, wait = manager.request_berthing(port, current_arrival_candidate, duration)
        
        # Simpan hasil
        total_system_delay += wait
        ship_availability[ship] = etd # Kapal baru bebas setelah ETD
        
        results.append({
            'Ship': ship,
            'Port': port,
            'Planned_ETA': planned_eta,
            'Actual_RTA': rta,
            'Actual_ETD': etd,
            'Delay_Hours': wait
        })
        
    return pd.DataFrame(results), total_system_delay

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    df, caps = load_data()
    print("Menjalankan Simulasi Baseline (First-Come-First-Served)...")
    res_df, total_delay = run_simulation(df, caps)
    
    print(f"Total Waiting Time: {total_delay:.2f} Jam")
    print("\nTop 30 Antrean Terparah:")
    print(res_df.sort_values('Delay_Hours', ascending=False).head(20)[['Ship', 'Port', 'Planned_ETA', 'Delay_Hours']])
    
    res_df.to_csv('results_file/baseline.csv', index=False)

Menjalankan Simulasi Baseline (First-Come-First-Served)...
Total Waiting Time: 178.00 Jam

Top 30 Antrean Terparah:
                Ship           Port         Planned_ETA  Delay_Hours
65        KM LABOBAR          AMBON 2025-01-02 19:00:00         21.0
26          KM KELUD  TANJUNG PRIOK 2024-12-30 04:00:00         16.0
268  KM TILONGKABILA       MAKASSAR 2025-01-11 18:00:00         15.0
483       KM CIREMAI  TANJUNG PRIOK 2025-01-19 03:00:00         15.0
475         KM TIDAR  TANJUNG PRIOK 2025-01-18 21:00:00         13.0
440    KM TATAMAILAU          AMBON 2025-01-17 16:00:00          9.0
257  KM BK SIGUNTANG       MAKASSAR 2025-01-11 07:00:00          9.0
83   KM GUNUNG DEMPO  TANJUNG PRIOK 2025-01-04 04:00:00          7.0
465  KM GUNUNG DEMPO  TANJUNG PRIOK 2025-01-18 14:00:00          7.0
15        KM CIREMAI  TANJUNG PRIOK 2024-12-29 05:00:00          7.0
11   KM TILONGKABILA       MAKASSAR 2024-12-28 18:00:00          6.0
421      KM NGGAPULU       MAKASSAR 2025-01-17 02:00:00 

In [4]:
# ParallelCAOA
class ParallelCAOA:
    def __init__(self, data_file, port_file, 
                 pop_size=50, max_iter=100,
                 alpha=0.3, beta=0.1, gamma=1.0, delta=1e-4, initial_energy=10.0,
                 n_jobs=-1): # n_jobs = -1 artinya pakai SEMUA core CPU
        
        # --- 1. Load Data (Hanya sekali di awal) ---
        self.voyages = pd.read_csv(data_file)
        self.voyages['ETA_Planned'] = pd.to_datetime(self.voyages['ETA_Planned'])
        
        ports = pd.read_csv(port_file)
        self.port_caps = dict(zip(ports['Nama_Pelabuhan'], ports['Total_Berths']))
        
        # --- Parameter CAOA ---
        self.dim = len(self.voyages)
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.delta = delta
        self.init_energy = initial_energy
        self.n_jobs = n_jobs
        
        self.lb = 0.0
        self.ub = 1.0

    def calculate_fitness_batch(self, population):
        """
        Menghitung fitness untuk seluruh populasi secara PARALEL.
        """
        # joblib akan menyebar tugas ini ke seluruh core CPU
        results = Parallel(n_jobs=self.n_jobs)(
            delayed(run_priority_simulation)(self.voyages, self.port_caps, ind) 
            for ind in population
        )
        return np.array(results)

    def optimize(self):
        # Inisialisasi
        population = np.random.uniform(self.lb, self.ub, (self.pop_size, self.dim))
        energies = np.full(self.pop_size, self.init_energy)
        
        print(f"Menghitung fitness awal menggunakan {multiprocessing.cpu_count()} CPU Cores...")
        fitness = self.calculate_fitness_batch(population)
        
        # Cari Global Best
        best_idx = np.argmin(fitness)
        gBestScore = fitness[best_idx]
        gBestPos = population[best_idx].copy()
        
        print(f"Start Optimization. Initial Best Delay: {gBestScore:.2f} Hours")
        
        # --- MAIN LOOP ---
        for t in range(self.max_iter):
            old_positions = population.copy()
            old_fitness = fitness.copy()
            
            # --- 1. UPDATE POSISI (Vektorisasi - Cepat di Numpy) ---
            # Kita bisa update posisi semua buaya sekaligus tanpa loop
            r = np.random.random((self.pop_size, self.dim))
            
            # Matriks Leader (di-broadcast ke semua baris)
            leader_matrix = np.tile(gBestPos, (self.pop_size, 1))
            
            # Rumus Gerak CAOA (Vectorized)
            # X_new = X + alpha*(Leader - X) + beta*(1 - 2*r)
            movements = self.alpha * (leader_matrix - population) + \
                        self.beta * (1.0 - 2.0 * r)
            
            new_population = population + movements
            new_population = np.clip(new_population, self.lb, self.ub)
            
            # --- 2. EVALUASI FITNESS PARALEL (Bottleneck Solver) ---
            # Ini bagian yang biasanya lambat, sekarang dikebut pakai semua core
            new_fitness = self.calculate_fitness_batch(new_population)
            
            # --- 3. SELEKSI (Vectorized) ---
            # Cari mana yang lebih baik
            improved_mask = new_fitness < fitness
            # Terapkan delta threshold (opsional, simplifikasi disini)
            
            # Update Populasi & Fitness hanya jika lebih baik
            population[improved_mask] = new_population[improved_mask]
            fitness[improved_mask] = new_fitness[improved_mask]
            
            # Update Global Best
            current_best_idx = np.argmin(fitness)
            if fitness[current_best_idx] < gBestScore:
                gBestScore = fitness[current_best_idx]
                gBestPos = population[current_best_idx].copy()
                print(f"Iterasi {t+1}: REKOR BARU! Delay turun ke {gBestScore:.2f} Jam")

            # --- 4. ENERGY MECHANISM ---
            distances = np.sqrt(np.sum((population - old_positions)**2, axis=1))
            energies = energies - (self.gamma * distances)
            
            # Reset Depleted
            depleted_indices = np.where(energies <= 0)[0]
            if len(depleted_indices) > 0:
                # Respawn acak
                population[depleted_indices] = np.random.uniform(
                    self.lb, self.ub, (len(depleted_indices), self.dim)
                )
                energies[depleted_indices] = self.init_energy
                
                # Hitung fitness untuk yang baru respawn (Paralel parsial)
                # Kita bisa hitung ini di iterasi depan, atau hitung sekarang.
                # Agar akurat, hitung sekarang:
                respawn_fits = Parallel(n_jobs=self.n_jobs)(
                    delayed(run_priority_simulation)(self.voyages, self.port_caps, population[i])
                    for i in depleted_indices
                )
                fitness[depleted_indices] = np.array(respawn_fits)

            # Logging
            if (t+1) % 10 == 0:
                print(f"Iterasi {t+1}/{self.max_iter} | Best: {gBestScore:.2f} h | Avg: {np.mean(fitness):.2f} h | Depleted: {len(depleted_indices)}")

        return gBestPos, gBestScore

if __name__ == "__main__":
    # Settings untuk Big Data
    optimizer = ParallelCAOA(
        data_file=input, # Ganti dengan data besar Anda
        port_file='port_data.csv',
        pop_size=100,      # Bisa naikkan populasi karena lebih cepat
        max_iter=200,
        alpha=0.3, beta=0.2, gamma=0.1, initial_energy=50.0,
        n_jobs=-1          # -1 = Pakai Semua Core CPU
    )
    
    best_prio, min_delay = optimizer.optimize()
    
    # Simpan Hasil
    df_result = optimizer.voyages.copy()
    df_result['Optimized_Priority'] = best_prio
    df_result.to_csv(output, index=False)
    print("Selesai.")

Menghitung fitness awal menggunakan 12 CPU Cores...
Start Optimization. Initial Best Delay: 1532.00 Hours
Iterasi 1: REKOR BARU! Delay turun ke 1192.00 Jam
Iterasi 2: REKOR BARU! Delay turun ke 1007.00 Jam
Iterasi 3: REKOR BARU! Delay turun ke 955.00 Jam
Iterasi 4: REKOR BARU! Delay turun ke 912.00 Jam
Iterasi 5: REKOR BARU! Delay turun ke 908.00 Jam
Iterasi 6: REKOR BARU! Delay turun ke 879.00 Jam
Iterasi 7: REKOR BARU! Delay turun ke 849.00 Jam
Iterasi 9: REKOR BARU! Delay turun ke 846.00 Jam
Iterasi 10: REKOR BARU! Delay turun ke 705.00 Jam
Iterasi 10/200 | Best: 705.00 h | Avg: 940.06 h | Depleted: 0
Iterasi 20/200 | Best: 705.00 h | Avg: 876.12 h | Depleted: 0
Iterasi 30/200 | Best: 705.00 h | Avg: 864.60 h | Depleted: 0
Iterasi 40/200 | Best: 705.00 h | Avg: 857.04 h | Depleted: 0
Iterasi 50/200 | Best: 705.00 h | Avg: 847.29 h | Depleted: 0
Iterasi 60/200 | Best: 705.00 h | Avg: 843.47 h | Depleted: 0
Iterasi 70/200 | Best: 705.00 h | Avg: 839.97 h | Depleted: 0
Iterasi 80/200 |

In [5]:
def run_detailed_simulation(voyages_df, port_capacity, priority_vector=None, scenario_name="Simulation"):
    """
    Menjalankan simulasi dan mengembalikan DataFrame detail.
    """
    sim_df = voyages_df.copy()
    
    # Pastikan format datetime
    if not pd.api.types.is_datetime64_any_dtype(sim_df['ETA_Planned']):
        sim_df['ETA_Planned'] = pd.to_datetime(sim_df['ETA_Planned'])
        
    # Jika vector prioritas diberikan, pakai itu. Jika tidak, cari di kolom.
    if priority_vector is not None:
        sim_df['CAOA_Priority'] = priority_vector
    else:
        # Jika tidak ada prioritas, default 0.5 (FCFS)
        if 'CAOA_Priority' not in sim_df.columns:
            sim_df['CAOA_Priority'] = 0.5
    
    # --- LOGIKA VIRTUAL QUEUE ---
    max_time_shift_hours = 24.0 
    time_shift = pd.to_timedelta(sim_df['CAOA_Priority'] * max_time_shift_hours, unit='h')
    sim_df['Queue_Time'] = sim_df['ETA_Planned'] - time_shift
    
    # Urutkan Global berdasarkan Queue Time
    sim_df.sort_values(by='Queue_Time', inplace=True)
    
    # State Manager
    port_state = {p: [pd.Timestamp.min] * cap for p, cap in port_capacity.items()}
    ship_availability = {} 
    
    results = []
    
    for idx, row in sim_df.iterrows():
        ship = row['Ship_Name']
        port = row['Port_Name']
        planned_eta = row['ETA_Planned']
        duration = row['Service_Time_Hours']
        
        # Propagasi Delay
        prev_finish = ship_availability.get(ship, pd.Timestamp.min)
        physical_arrival = max(planned_eta, prev_finish)
        
        # Cek Dermaga
        berths = port_state.get(port, [pd.Timestamp.min])
        berths.sort()
        free_berth_time = berths[0]
        
        # Waktu Sandar
        berthing_time = max(physical_arrival, free_berth_time)
        finish_time = berthing_time + pd.to_timedelta(duration, unit='h')
        
        # Hitung Metrics
        delay_seconds = (berthing_time - planned_eta).total_seconds()
        delay_hours = max(0.0, delay_seconds / 3600.0)
        
        waiting_time_hours = (berthing_time - physical_arrival).total_seconds() / 3600.0
        
        # Update State
        berths[0] = finish_time
        port_state[port] = berths
        ship_availability[ship] = finish_time
        
        results.append({
            'Ship_Name': ship,
            'Port_Name': port,
            'ETA_Planned': planned_eta,
            'Scenario': scenario_name,
            'Actual_Arrival': physical_arrival,
            'Actual_Berth': berthing_time,
            'Delay_Hours': delay_hours,
            'Waiting_Time_Hours': waiting_time_hours
        })
        
    return pd.DataFrame(results)

def generate_metrics_report(voyage_file, port_file, optimized_file):
    print("=== MEMULAI PERHITUNGAN METRIK (REVISI) ===")
    
    # 1. Load Data
    voyages = pd.read_csv(voyage_file)
    ports = pd.read_csv(port_file)
    caps = dict(zip(ports['Nama_Pelabuhan'], ports['Total_Berths']))
    
    # Konversi tanggal di voyage asli
    voyages['ETA_Planned'] = pd.to_datetime(voyages['ETA_Planned'])

    # 2. Load Hasil Optimasi & MERGE YANG BENAR
    optimized_data = pd.read_csv(optimized_file)
    optimized_data['ETA_Planned'] = pd.to_datetime(optimized_data['ETA_Planned'])
    
    # Kita hanya butuh kolom prioritas dari file hasil
    # Merge berdasarkan kunci unik: Ship, Port, ETA
    print("Menggabungkan data prioritas...")
    merged_df = pd.merge(
        voyages,
        optimized_data[['Ship_Name', 'Port_Name', 'ETA_Planned', 'Optimized_Priority']],
        on=['Ship_Name', 'Port_Name', 'ETA_Planned'],
        how='left'
    )
    
    # Isi NaN dengan 0.5 jika ada yang tidak match (Safety)
    merged_df['Optimized_Priority'] = merged_df['Optimized_Priority'].fillna(0.5)

    # 3. RUN BASELINE (FCFS)
    print("1. Menghitung Baseline (FCFS)...")
    # Buat copy data asli, set prioritas ke 0.5
    df_base_input = voyages.copy()
    df_base_input['CAOA_Priority'] = 0.5
    df_baseline = run_detailed_simulation(df_base_input, caps, None, "Baseline")
    
    # 4. RUN OPTIMIZED (CAOA)
    print("2. Menghitung Optimized (CAOA)...")
    # Gunakan merged_df yang sudah punya prioritas yang benar
    df_opt_input = merged_df.copy()
    # Rename kolom agar sesuai fungsi simulasi
    df_opt_input.rename(columns={'Optimized_Priority': 'CAOA_Priority'}, inplace=True)
    df_optimized = run_detailed_simulation(df_opt_input, caps, None, "Optimized")
    
    # 5. AGGREGATE METRICS
    total_delay_base = df_baseline['Delay_Hours'].sum()
    total_delay_opt = df_optimized['Delay_Hours'].sum()
    
    total_wait_base = df_baseline['Waiting_Time_Hours'].sum()
    total_wait_opt = df_optimized['Waiting_Time_Hours'].sum()
    
    time_saved = total_delay_base - total_delay_opt
    efficiency_gain = (time_saved / total_delay_base) * 100 if total_delay_base > 0 else 0
    
    # 6. PRINT REPORT
    print("\n" + "="*40)
    print("   LAPORAN KINERJA OPTIMASI (KPI)")
    print("="*40)
    print(f"{'Metrik':<25} | {'Baseline':<10} | {'Optimized':<10} | {'Delta':<15}")
    print("-" * 70)
    print(f"{'Total Delay (Jam)':<25} | {total_delay_base:<10.2f} | {total_delay_opt:<10.2f} | {time_saved:<10.2f}")
    print(f"{'Total Wait (Jam)':<25}  | {total_wait_base:<10.2f} | {total_wait_opt:<10.2f} | {(total_wait_base - total_wait_opt):<10.2f}")
    print("-" * 70)
    print(f"✅ EFISIENSI TOTAL: {efficiency_gain:.2f}%")
    print("="*40)

if __name__ == "__main__":
    generate_metrics_report(
        voyage_file=input,
        port_file='port_data.csv',
        optimized_file=output
    )

=== MEMULAI PERHITUNGAN METRIK (REVISI) ===
Menggabungkan data prioritas...
1. Menghitung Baseline (FCFS)...
2. Menghitung Optimized (CAOA)...

   LAPORAN KINERJA OPTIMASI (KPI)
Metrik                    | Baseline   | Optimized  | Delta          
----------------------------------------------------------------------
Total Delay (Jam)         | 583.00     | 14670.00   | -14087.00 
Total Wait (Jam)           | 178.00     | 2716.00    | -2538.00  
----------------------------------------------------------------------
✅ EFISIENSI TOTAL: -2416.30%
