In [1]:
import pandas as pd
import numpy as np
from datetime import datetime


In [None]:
def get_unique_ports_pipeline(file_path):
    print(f"--- Membaca file: {file_path} ---")

    df = pd.read_csv(file_path)
    raw_ports = df['PELABUHAN'].astype(str).str.strip()
    unique_ports = raw_ports.unique()
    unique_ports.sort()
    
    count = len(unique_ports)

    print(f"Ditemukan {count} pelabuhan unik.")
    print("\nDaftar Pelabuhan:")
    for i, port in enumerate(unique_ports):
        print(f"{i+1}. {port}")
        
    return unique_ports, count

file_path = '/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/testcase.csv'

ports_list, total_ports = get_unique_ports_pipeline(file_path)

--- Membaca file: testcase.csv ---
Ditemukan 73 pelabuhan unik.

Daftar Pelabuhan:
1. Agats
2. Ambon
3. Awerange
4. Bacan
5. Balikpapan
6. Banda
7. Banggai
8. Batam
9. Batulicin
10. Bau-Bau
11. Belawan
12. Benoa
13. Biak
14. Bima
15. Bitung
16. Blinyu
17. Bontang
18. Cirebon
19. Dobo
20. Ende
21. Fak-Fak
22. Geser
23. Gorontalo
24. Jayapura
25. Kaimana
26. Kalabahi
27. Karimun Jawa
28. Kendari
29. Kijang
30. Kumai
31. Kupang
32. Labuan Bajo
33. Larantuka
34. Lembar
35. Letung
36. Lewoleba
37. Luwuk
38. Makassar
39. Manokwari
40. Maumere
41. Merauke
42. Midai
43. Nabire
44. Namlea
45. Namrole
46. Natuna
47. Nunukan
48. Pantoloan
49. Pare-Pare
50. Pontianak
51. Raha
52. Rote
53. Sampit
54. Sanana
55. Saumlaki
56. Semarang
57. Serasan
58. Serui
59. Sorong
60. Surabaya
61. Tarakan
62. Tarempa
63. Ternate
64. Tg. Balai Karimun
65. Tg. Pandan
66. Tg. Priok
67. Tidore
68. Timika
69. Tual
70. Waikelo
71. Waingapu
72. Wanci
73. Waren


In [None]:
def preprocess_jssp_tardiness(file_path, output_path='jssp_tardiness_data.csv'):
    # 1. Load & Parse
    df = pd.read_csv(file_path, dtype={'VOYAGE': str})
    
    def parse_dt(d, t):
        if pd.isna(d) or pd.isna(t): return pd.NaT
        return datetime.strptime(f"{d} {t}", "%d-%b-%y %H:%M")

    df['ETA_FULL'] = df.apply(lambda x: parse_dt(x['ETA_TANGGAL'], x['ETA_JAM']), axis=1)
    df['ETD_FULL'] = df.apply(lambda x: parse_dt(x['ETD_TANGGAL'], x['ETD_JAM']), axis=1)
    df = df.dropna(subset=['ETA_FULL']).sort_values('ETA_FULL')

    # 2. Mappings
    unique_ports = sorted(df['PELABUHAN'].unique())
    port_map = {p: i+1 for i, p in enumerate(unique_ports)}
    df['Machine_ID'] = df['PELABUHAN'].map(port_map)
    
    df['JOB_KEY'] = df['NAMA_KAPAL'] + "_" + df['VOYAGE']
    unique_jobs = df['JOB_KEY'].unique()
    job_map = {k: i+1 for i, k in enumerate(unique_jobs)}
    
    # 3. Time Reference (t=0)
    global_start = df['ETA_FULL'].min()
    
    jssp_rows = []
    
    for job_key, group in df.groupby('JOB_KEY'):
        group = group.sort_values('ETA_FULL')
        seq = 1
        for idx, row in group.iterrows():
            # Arrival Time (Ready Time)
            arr_rel = (row['ETA_FULL'] - global_start).total_seconds() / 3600.0
            
            # Processing Time & Due Date
            if pd.notna(row['ETD_FULL']):
                # Normal Port: Due Date = Original ETD
                proc_time = (row['ETD_FULL'] - row['ETA_FULL']).total_seconds() / 3600.0
                due_date_rel = (row['ETD_FULL'] - global_start).total_seconds() / 3600.0
            else:
                # Last Port: No processing, Due Date = Original ETA
                proc_time = 0.0
                due_date_rel = arr_rel 
            
            jssp_rows.append({
                'Job_ID': job_map[job_key],
                'Job_Label': job_key,
                'Operation_Seq': seq,
                'Machine_ID': row['Machine_ID'],
                'Port_Label': row['PELABUHAN'],
                'Arrival_Time': round(arr_rel, 2),
                'Proc_Time': max(0.0, round(proc_time, 2)),
                'Due_Date': round(due_date_rel, 2),
            })
            seq += 1
    
    final_df = pd.DataFrame(jssp_rows)  
    final_df = final_df.sort_values(by=['Job_ID', 'Operation_Seq'])
    final_df.to_csv(output_path, index=False)
    print(f"Data tersimpan di {output_path}")

preprocess_jssp_tardiness('/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/testcase.csv')

Data tersimpan di jssp_tardiness_data.csv dengan kolom Due_Date.
