In [1]:
import pandas as pd
import numpy as np
from datetime import datetime


In [None]:
def get_unique_ports_pipeline(file_path):
    print(f"--- Membaca file: {file_path} ---")

    df = pd.read_csv(file_path)
    raw_ports = df['PELABUHAN'].astype(str).str.strip()
    unique_ports = raw_ports.unique()
    unique_ports.sort()
    
    count = len(unique_ports)

    print(f"Ditemukan {count} pelabuhan unik.")
    print("\nDaftar Pelabuhan:")
    for i, port in enumerate(unique_ports):
        print(f"{i+1}. {port}")
        
    return unique_ports, count

file_path = '/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/testcase.csv'

ports_list, total_ports = get_unique_ports_pipeline(file_path)

In [None]:
def preprocess_jssp_tardiness(file_path, output_path='/home/re1jie/TA-code/JSSP-CAOA-SSR/Utility/jssp_data.csv'):
    # 1. Load & Parse
    print(f"Membaca file dari: {file_path}")
    df = pd.read_csv(file_path, dtype={'VOYAGE': str})
    
    # Helper Parsing
    ID_TO_EN = {'Des': 'Dec', 'Mei': 'May', 'Agt': 'Aug', 'Agu': 'Aug', 
                'Okt': 'Oct', 'Juli': 'Jul', 'Juni': 'Jun'}

    def parse_dt(d, t):
        if pd.isna(d) or pd.isna(t): return pd.NaT
        d_str = str(d).strip()
        for id_m, en_m in ID_TO_EN.items():
            if id_m in d_str: d_str = d_str.replace(id_m, en_m)
        try:
            return datetime.strptime(f"{d_str} {str(t).strip()}", "%d-%b-%y %H:%M")
        except:
            return pd.NaT

    df['ETA_FULL'] = df.apply(lambda x: parse_dt(x['ETA_TANGGAL'], x['ETA_JAM']), axis=1)
    df['ETD_FULL'] = df.apply(lambda x: parse_dt(x['ETD_TANGGAL'], x['ETD_JAM']), axis=1)
    df = df.dropna(subset=['ETA_FULL']).sort_values('ETA_FULL')

    # 2. Mappings
    unique_ports = sorted(df['PELABUHAN'].unique())
    port_map = {p: i+1 for i, p in enumerate(unique_ports)}
    df['Machine_ID'] = df['PELABUHAN'].map(port_map)
    
    df['JOB_KEY'] = df['NAMA_KAPAL'] + "_" + df['VOYAGE']
    unique_jobs = df['JOB_KEY'].unique()
    job_map = {k: i+1 for i, k in enumerate(unique_jobs)}
    
    # 3. Time Reference (t=0)
    global_start = df['ETA_FULL'].min()
    
    jssp_rows = []
    
    # GROUPING JOB
    for job_key, group in df.groupby('JOB_KEY'):
        group = group.sort_values('ETA_FULL')
        records = group.to_dict('records') 
        seq = 1
        
        # [MODIFIKASI UTAMA]
        # Kita hanya looping sampai len(records) - 1.
        # Baris TERAKHIR (Destinasi Akhir Voyage) tidak dianggap sebagai operasi JSSP,
        # melainkan hanya sebagai acuan ETA untuk menghitung Travel Time operasi sebelumnya.
        
        for i in range(len(records) - 1):
            row = records[i]
            next_row = records[i + 1] # Aman diakses karena loop cuma sampai len-1
            
            # --- 1. Arrival Time (Ready Time) ---
            arr_rel = (row['ETA_FULL'] - global_start).total_seconds() / 3600.0
            
            # --- 2. Processing Time & Due Date ---
            if pd.notna(row['ETD_FULL']):
                proc_time = (row['ETD_FULL'] - row['ETA_FULL']).total_seconds() / 3600.0
                due_date_rel = (row['ETD_FULL'] - global_start).total_seconds() / 3600.0
            else:
                # Fallback jika ETD kosong di tengah voyage (jarang terjadi di data bersih)
                proc_time = 0.0
                due_date_rel = arr_rel 
            
            # --- 3. Travel Time Logic ---
            # Sekarang logic ini PASTI jalan dan PASTI punya next_row
            travel_time = 0.0
            
            if pd.notna(row['ETD_FULL']) and pd.notna(next_row['ETA_FULL']):
                travel_delta = next_row['ETA_FULL'] - row['ETD_FULL']
                travel_time = travel_delta.total_seconds() / 3600.0
                # Cegah nilai negatif jika jadwal berantakan
                travel_time = max(0.0, travel_time)
            
            jssp_rows.append({
                'Job_ID': job_map[job_key],
                'Job_Label': job_key,
                'Operation_Seq': seq,
                'Machine_ID': row['Machine_ID'],
                'Port_Label': row['PELABUHAN'],
                'Travel_Time': round(travel_time, 2), # Travel ke pelabuhan terakhir/berikutnya
                'Arrival_Time': round(arr_rel, 2),
                'Proc_Time': max(0.0, round(proc_time, 2)),
                'Due_Date': round(due_date_rel, 2),
            })
            seq += 1
            
            # Baris terakhir (Destinasi) otomatis ter-skip oleh range loop,
            # tapi datanya (ETA) sudah terpakai di perhitungan 'travel_time' iterasi terakhir.
    
    final_df = pd.DataFrame(jssp_rows)  
    final_df = final_df.sort_values(by=['Job_ID', 'Operation_Seq'])
    final_df.to_csv(output_path, index=False)
    print(f"Data tersimpan di {output_path}")

# Jalankan ulang
preprocess_jssp_tardiness('/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/testcase.csv')

Membaca file dari: /home/re1jie/TA-code/JSSP-CAOA-SSR/Data/testcase.csv
Data tersimpan di /home/re1jie/TA-code/JSSP-CAOA-SSR/Utility/jssp_data.csv
