In [72]:
import pandas as pd
from datetime import timedelta
import numpy as np
from caoa_solver import run_priority_simulation
from joblib import Parallel, delayed
import multiprocessing

In [None]:
# Test Files
input = 'split_by_month/Voyage_Data_2025_07.csv'
output = 'results_file/current/optimized_schedule.csv'

In [74]:
# 1. Load Data
def load_data():
    ports_df = pd.read_csv('port_data.csv')
    voyages_df = pd.read_csv(input)
    
    # Konversi string tanggal ke datetime object
    voyages_df['ETA_Planned'] = pd.to_datetime(voyages_df['ETA_Planned'])
    
    # Buat Dictionary Kapasitas Pelabuhan: {'TANJUNG PRIOK': 3, 'SURABAYA': 2, ...}
    port_capacity = dict(zip(ports_df['Nama_Pelabuhan'], ports_df['Total_Berths']))
    
    return voyages_df, port_capacity

# 2. Kelas untuk Melacak Status Pelabuhan
class PortManager:
    def __init__(self, capacity_dict):
        # Menyimpan timeline kapan berth kosong
        # Format: {'PRIOK': [waktu_bebas_berth_1, waktu_bebas_berth_2, ...]}
        self.berths = {p: [pd.Timestamp.min] * cap for p, cap in capacity_dict.items()}
        
    def request_berthing(self, port_name, arrival_time, service_duration):
        if port_name not in self.berths:
            # Jika pelabuhan tidak ada di data kapasitas, asumsikan 1 berth
            self.berths[port_name] = [pd.Timestamp.min]
            
        available_slots = self.berths[port_name]
        available_slots.sort() # Urutkan dari yang paling cepat kosong
        
        # Ambil slot yang paling cepat kosong
        earliest_free_time = available_slots[0]
        
        # Hitung waktu sandar aktual (RTA - Realized Time of Arrival)
        # Kapal bisa masuk max(Jadwal Kedatangan, Waktu Berth Kosong)
        actual_berthing_time = max(arrival_time, earliest_free_time)
        
        # Hitung Delay (Antrean)
        waiting_time = (actual_berthing_time - arrival_time).total_seconds() / 3600.0
        
        # Update kapan berth ini akan kosong lagi (ETD)
        departure_time = actual_berthing_time + timedelta(hours=service_duration)
        available_slots[0] = departure_time # Kunci slot ini sampai kapal pergi
        
        return actual_berthing_time, departure_time, waiting_time

# 3. Fungsi Simulasi Utama (Decoder)
def run_simulation(voyages_df, port_capacity):
    # Urutkan seluruh jadwal berdasarkan ETA Planned (Kronologis)
    # Ini logika FCFS murni (Siapa cepat dia dapat)
    queue = voyages_df.sort_values('ETA_Planned').copy()
    
    manager = PortManager(port_capacity)
    
    results = []
    total_system_delay = 0
    
    # Dictionary untuk melacak kapan kapal selesai di pelabuhan sebelumnya
    # Agar delay terpropagasi ke pelabuhan berikutnya
    ship_availability = {} 
    
    for idx, row in queue.iterrows():
        ship = row['Ship_Name']
        port = row['Port_Name']
        planned_eta = row['ETA_Planned']
        duration = row['Service_Time_Hours']
        
        # Cek ketersediaan kapal (Propagasi Delay)
        # Kapal tidak bisa tiba di pelabuhan B sebelum selesai dari pelabuhan A + Sailing Time
        # (Sailing time disederhanakan/diabaikan dulu di snippet ini, 
        # asumsinya ETA_Planned sudah termasuk sailing time ideal)
        
        if ship in ship_availability:
            # Jika kapal telat di pelabuhan sebelumnya, ETA di sini juga mundur
            prev_departure = ship_availability[ship]
            # Estimasi sailing time dari data asli (selisih ETA port ini dgn departure port lalu)
            # Untuk simplifikasi FCFS awal, kita pakai max(Planned, Prev_Departure + Sailing)
            # Di sini kita paksa kapal minimal tiba sesuai planned, atau setelah free dari port lalu
            current_arrival_candidate = max(planned_eta, prev_departure) 
        else:
            current_arrival_candidate = planned_eta
            
        # Minta akses ke Pelabuhan
        rta, etd, wait = manager.request_berthing(port, current_arrival_candidate, duration)
        
        # Simpan hasil
        total_system_delay += wait
        ship_availability[ship] = etd # Kapal baru bebas setelah ETD
        
        results.append({
            'Ship': ship,
            'Port': port,
            'Planned_ETA': planned_eta,
            'Actual_RTA': rta,
            'Actual_ETD': etd,
            'Delay_Hours': wait
        })
        
    return pd.DataFrame(results), total_system_delay

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    df, caps = load_data()
    print("Menjalankan Simulasi Baseline (First-Come-First-Served)...")
    res_df, total_delay = run_simulation(df, caps)
    
    print(f"Total Waiting Time: {total_delay:.2f} Jam")
    print("\nTop 30 Antrean Terparah:")
    print(res_df.sort_values('Delay_Hours', ascending=False).head(20)[['Ship', 'Port', 'Planned_ETA', 'Delay_Hours']])
    
    res_df.to_csv('results_file/current/baseline.csv', index=False)

Menjalankan Simulasi Baseline (First-Come-First-Served)...
Total Waiting Time: 35011.00 Jam

Top 30 Antrean Terparah:
                Ship           Port         Planned_ETA  Delay_Hours
81        KM LABOBAR  TANJUNG PRIOK 2025-06-02 19:00:00       1411.0
106  KM GUNUNG DEMPO  TANJUNG PRIOK 2025-06-04 05:00:00       1395.0
141         KM WILIS       MAKASSAR 2025-06-06 00:00:00       1359.0
381       KM SIRIMAU       MAKASSAR 2025-06-16 13:00:00       1219.0
620      KM SANGIANG          AMBON 2025-06-26 15:00:00       1058.0
3       KM DOBONSOLO  TANJUNG PRIOK 2025-05-24 16:00:00       1032.0
9        KM KELIMUTU  TANJUNG PRIOK 2025-05-27 10:00:00       1004.0
8         KM CIREMAI  TANJUNG PRIOK 2025-05-26 20:00:00        989.0
23        KM BK RAYA  TANJUNG PRIOK 2025-05-29 17:00:00        974.0
19       KM NGGAPULU  TANJUNG PRIOK 2025-05-29 03:00:00        973.0
27          KM KELUD  TANJUNG PRIOK 2025-05-30 04:00:00        970.0
11        KM LAMBELU       MAKASSAR 2025-05-27 21:00:0

In [75]:
# ParallelCAOA
class ParallelCAOA:
    def __init__(self, data_file, port_file, 
                 pop_size=50, max_iter=100,
                 alpha=0.3, beta=0.1, gamma=1.0, delta=1e-4, initial_energy=10.0,
                 n_jobs=-1): # n_jobs = -1 artinya pakai SEMUA core CPU
        
        # --- 1. Load Data (Hanya sekali di awal) ---
        self.voyages = pd.read_csv(data_file)
        self.voyages['ETA_Planned'] = pd.to_datetime(self.voyages['ETA_Planned'])
        
        ports = pd.read_csv(port_file)
        self.port_caps = dict(zip(ports['Nama_Pelabuhan'], ports['Total_Berths']))
        
        # --- Parameter CAOA ---
        self.dim = len(self.voyages)
        self.pop_size = pop_size
        self.max_iter = max_iter
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.delta = delta
        self.init_energy = initial_energy
        self.n_jobs = n_jobs
        
        self.lb = 0.0
        self.ub = 1.0

    def calculate_fitness_batch(self, population):
        """
        Menghitung fitness untuk seluruh populasi secara PARALEL.
        """
        # joblib akan menyebar tugas ini ke seluruh core CPU
        results = Parallel(n_jobs=self.n_jobs)(
            delayed(run_priority_simulation)(self.voyages, self.port_caps, ind) 
            for ind in population
        )
        return np.array(results)

    def optimize(self):
        # Inisialisasi
        population = np.random.uniform(self.lb, self.ub, (self.pop_size, self.dim))
        population[0] = 0.5
        energies = np.full(self.pop_size, self.init_energy)
        
        print(f"Menghitung fitness awal menggunakan {multiprocessing.cpu_count()} CPU Cores...")
        fitness = self.calculate_fitness_batch(population)
        
        # Cari Global Best
        best_idx = np.argmin(fitness)
        gBestScore = fitness[best_idx]
        gBestPos = population[best_idx].copy()
        
        print(f"Start Optimization. Initial Best Delay: {gBestScore:.2f} Hours")
        
        # --- MAIN LOOP ---
        for t in range(self.max_iter):
            old_positions = population.copy()
            old_fitness = fitness.copy()
            
            # --- 1. UPDATE POSISI (Vektorisasi - Cepat di Numpy) ---
            # Kita bisa update posisi semua buaya sekaligus tanpa loop
            r = np.random.random((self.pop_size, self.dim))
            
            # Matriks Leader (di-broadcast ke semua baris)
            leader_matrix = np.tile(gBestPos, (self.pop_size, 1))
            
            # Rumus Gerak CAOA (Vectorized)
            # X_new = X + alpha*(Leader - X) + beta*(1 - 2*r)
            movements = self.alpha * (leader_matrix - population) + \
                        self.beta * (1.0 - 2.0 * r)
            
            new_population = population + movements
            new_population = np.clip(new_population, self.lb, self.ub)
            
            # --- 2. EVALUASI FITNESS PARALEL (Bottleneck Solver) ---
            # Ini bagian yang biasanya lambat, sekarang dikebut pakai semua core
            new_fitness = self.calculate_fitness_batch(new_population)
            
            # --- 3. SELEKSI (Vectorized) ---
            # Cari mana yang lebih baik
            improved_mask = new_fitness < fitness
            # Terapkan delta threshold (opsional, simplifikasi disini)
            
            # Update Populasi & Fitness hanya jika lebih baik
            population[improved_mask] = new_population[improved_mask]
            fitness[improved_mask] = new_fitness[improved_mask]
            
            # Update Global Best
            current_best_idx = np.argmin(fitness)
            if fitness[current_best_idx] < gBestScore:
                gBestScore = fitness[current_best_idx]
                gBestPos = population[current_best_idx].copy()
                print(f"Iterasi {t+1}: REKOR BARU! Delay turun ke {gBestScore:.2f} Jam")

            # --- 4. ENERGY MECHANISM ---
            distances = np.sqrt(np.sum((population - old_positions)**2, axis=1))
            energies = energies - (self.gamma * distances)
            
            # Reset Depleted
            depleted_indices = np.where(energies <= 0)[0]
            if len(depleted_indices) > 0:
                # Respawn acak
                population[depleted_indices] = np.random.uniform(
                    self.lb, self.ub, (len(depleted_indices), self.dim)
                )
                energies[depleted_indices] = self.init_energy
                
                # Hitung fitness untuk yang baru respawn (Paralel parsial)
                # Kita bisa hitung ini di iterasi depan, atau hitung sekarang.
                # Agar akurat, hitung sekarang:
                respawn_fits = Parallel(n_jobs=self.n_jobs)(
                    delayed(run_priority_simulation)(self.voyages, self.port_caps, population[i])
                    for i in depleted_indices
                )
                fitness[depleted_indices] = np.array(respawn_fits)

            # Logging
            if (t+1) % 10 == 0:
                print(f"Iterasi {t+1}/{self.max_iter} | Best: {gBestScore:.2f} h | Avg: {np.mean(fitness):.2f} h | Depleted: {len(depleted_indices)}")

        return gBestPos, gBestScore

if __name__ == "__main__":
    # Settings untuk Big Data
    optimizer = ParallelCAOA(
        data_file=input, # Ganti dengan data besar Anda
        port_file='port_data.csv',
        pop_size=100,      # Bisa naikkan populasi karena lebih cepat
        max_iter=200,
        alpha=0.3, beta=0.2, gamma=0.1, initial_energy=50.0,
        n_jobs=-1          # -1 = Pakai Semua Core CPU
    )
    
    best_prio, min_delay = optimizer.optimize()
    
    # Simpan Hasil
    df_result = optimizer.voyages.copy()
    df_result['Optimized_Priority'] = best_prio
    df_result.to_csv(output, index=False)
    print("Selesai.")

Menghitung fitness awal menggunakan 12 CPU Cores...
Start Optimization. Initial Best Delay: 765067.00 Hours
Iterasi 1: REKOR BARU! Delay turun ke 759950.00 Jam
Iterasi 3: REKOR BARU! Delay turun ke 758857.00 Jam
Iterasi 4: REKOR BARU! Delay turun ke 757098.00 Jam
Iterasi 6: REKOR BARU! Delay turun ke 756162.00 Jam
Iterasi 9: REKOR BARU! Delay turun ke 756078.00 Jam
Iterasi 10: REKOR BARU! Delay turun ke 755844.00 Jam
Iterasi 10/200 | Best: 755844.00 h | Avg: 758957.78 h | Depleted: 0
Iterasi 12: REKOR BARU! Delay turun ke 755426.00 Jam
Iterasi 17: REKOR BARU! Delay turun ke 754748.00 Jam
Iterasi 20/200 | Best: 754748.00 h | Avg: 756284.59 h | Depleted: 0
Iterasi 22: REKOR BARU! Delay turun ke 754335.00 Jam
Iterasi 25: REKOR BARU! Delay turun ke 754292.00 Jam
Iterasi 26: REKOR BARU! Delay turun ke 754173.00 Jam
Iterasi 30/200 | Best: 754173.00 h | Avg: 755559.63 h | Depleted: 0
Iterasi 32: REKOR BARU! Delay turun ke 754138.00 Jam
Iterasi 35: REKOR BARU! Delay turun ke 754114.00 Jam
Iter

In [76]:
import pandas as pd
import numpy as np
# Pastikan file caoa_solver.py ada di folder yang sama
from caoa_solver import run_priority_simulation 

def generate_metrics_report(voyage_file, port_file, optimized_file):
    print("=== MEMULAI PERHITUNGAN METRIK (INDEX MERGE) ===")
    
    # 1. Load Data
    try:
        voyages = pd.read_csv(voyage_file)
        ports = pd.read_csv(port_file)
        optimized_data = pd.read_csv(optimized_file)
    except FileNotFoundError as e:
        print(f"❌ ERROR: File tidak ditemukan - {e}")
        return

    # 2. VALIDASI JUMLAH BARIS (Syarat Wajib Index Merge)
    if len(voyages) != len(optimized_data):
        print(f"❌ FATAL ERROR: Jumlah baris tidak sama!")
        print(f"   Input: {len(voyages)} vs Output: {len(optimized_data)}")
        print("   Tidak bisa melakukan merge by index karena struktur data berubah.")
        return
    else:
        print(f"✅ Validasi Sukses: Kedua file memiliki {len(voyages)} baris.")

    # 3. MERGING BY INDEX (Direct Assignment)
    # Kita tidak pakai pd.merge(). Kita langsung tempel kolomnya.
    # Asumsinya baris ke-1 di input adalah baris ke-1 di output.
    print("Menggabungkan data prioritas berdasarkan index...")
    
    merged_df = voyages.copy()
    
    # Pre-process datetime untuk simulasi
    merged_df['ETA_Planned'] = pd.to_datetime(merged_df['ETA_Planned'])
    
    # TEMPEL KOLOM PRIORITAS
    # Ini adalah inti perbaikannya.
    merged_df['Optimized_Priority'] = optimized_data['Optimized_Priority']
    
    # Isi NaN dengan 0.5 (Safety)
    merged_df['Optimized_Priority'] = merged_df['Optimized_Priority'].fillna(0.5)

    # Setup Kapasitas Pelabuhan
    caps = dict(zip(ports['Nama_Pelabuhan'], ports['Total_Berths']))

    # 4. RUN SIMULASI VIA CAOA_SOLVER
    
    # A. Baseline (FCFS -> Priority semua 0.5)
    print("1. Menghitung Baseline (FCFS)...")
    baseline_prio = [0.5] * len(merged_df)
    # Gunakan merged_df yang sudah bersih
    df_baseline = run_priority_simulation(merged_df, caps, baseline_prio, return_detailed=True)
    
    # B. Optimized (CAOA)
    print("2. Menghitung Optimized (CAOA)...")
    opt_prio = merged_df['Optimized_Priority'].values
    df_optimized = run_priority_simulation(merged_df, caps, opt_prio, return_detailed=True)
    
    # 5. Hitung Statistik
    total_delay_base = df_baseline['Delay_Hours'].sum()
    total_delay_opt = df_optimized['Delay_Hours'].sum()
    
    # Hitung Delta
    time_saved = total_delay_base - total_delay_opt
    efficiency_gain = (time_saved / total_delay_base) * 100 if total_delay_base > 0 else 0
    
    print("\n" + "="*45)
    print(f"{'METRIK':<20} | {'NILAI':<20}")
    print("-" * 45)
    print(f"{'Total Delay Baseline':<20} : {total_delay_base:,.2f} Jam")
    print(f"{'Total Delay Optimized':<20} : {total_delay_opt:,.2f} Jam")
    print("-" * 45)
    print(f"{'Time Saved':<20} : {time_saved:,.2f} Jam")
    print(f"{'Efficiency Gain':<20} : {efficiency_gain:.2f}%")
    print("="*45)
    
    # Simpan file detail
    output_detail = 'results_file/current/final_detailed_report.csv'
    df_optimized.to_csv(output_detail, index=False)
    print(f"Laporan detail disimpan ke: {output_detail}")

# --- EKSEKUSI LANGSUNG ---
if __name__ == "__main__":
    try:
        # Menggunakan variabel global 'input' dan 'output' dari cell sebelumnya
        input_file = input 
        output_file = output 
        port_file = 'port_data.csv'
        
        generate_metrics_report(input_file, port_file, output_file)
        
    except NameError:
        print("⚠️ Variabel 'input' atau 'output' belum didefinisikan.")
        # Uncomment baris di bawah ini untuk testing manual jika variabel global tidak ada
        # input_file = 'split_by_month/Voyage_Data_2025_01.csv'
        # output_file = 'results_file/current/optimized_schedule.csv'
        # generate_metrics_report(input_file, 'port_data.csv', output_file)

=== MEMULAI PERHITUNGAN METRIK (INDEX MERGE) ===
✅ Validasi Sukses: Kedua file memiliki 733 baris.
Menggabungkan data prioritas berdasarkan index...
1. Menghitung Baseline (FCFS)...
2. Menghitung Optimized (CAOA)...

METRIK               | NILAI               
---------------------------------------------
Total Delay Baseline : 776,477.00 Jam
Total Delay Optimized : 752,837.00 Jam
---------------------------------------------
Time Saved           : 23,640.00 Jam
Efficiency Gain      : 3.04%
Laporan detail disimpan ke: results_file/current/final_detailed_report.csv
