In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import re

In [10]:
DATA_INPUT = '/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/raw_data.csv'

## List pelabuhan unik

In [11]:
def get_unique_ports_pipeline(file_path):
    print(f"--- Membaca file: {file_path} ---")

    df = pd.read_csv(file_path)
    raw_ports = df['PELABUHAN'].astype(str).str.strip()
    unique_ports = raw_ports.unique()
    unique_ports = sorted(raw_ports.unique())
    
    count = len(unique_ports)

    print(f"Ditemukan {count} pelabuhan unik.")
    print("\nDaftar Pelabuhan:")
    for i, port in enumerate(unique_ports):
        print(f"{i+1}. {port}")
        
    return unique_ports, count

ports_list, total_ports = get_unique_ports_pipeline(DATA_INPUT)

--- Membaca file: /home/re1jie/TA-code/JSSP-CAOA-SSR/Data/raw_data.csv ---
Ditemukan 76 pelabuhan unik.

Daftar Pelabuhan:
1. Agats
2. Ambon
3. Awerange
4. Bacan
5. Balikpapan
6. Banda
7. Banggai
8. Batam
9. Batulicin
10. Baubau
11. Belawan
12. Benoa
13. Biak
14. Bima
15. Bitung
16. Blinyu
17. Bontang
18. Cilegon
19. Cirebon
20. Dobo
21. Ende
22. Fakfak
23. Geser
24. Gorontalo
25. Jailolo
26. Jayapura
27. Kaimana
28. Kalabahi
29. Karimun Jawa
30. Kendari
31. Kijang
32. Kumai
33. Kupang
34. Labuan Bajo
35. Larantuka
36. Lembar
37. Letung
38. Lewoleba
39. Luwuk
40. Makassar
41. Manokwari
42. Maumere
43. Merauke
44. Midai
45. Nabire
46. Namlea
47. Namrole
48. Natuna
49. Nunukan
50. Pantoloan
51. Parepare
52. Pontianak
53. Raha
54. Rote
55. Sampit
56. Sanana
57. Saumlaki
58. Semarang
59. Serasan
60. Serui
61. Sorong
62. Surabaya
63. Tanjung Balai Karimun
64. Tanjung Pandan
65. Tanjung Priok
66. Tarakan
67. Tarempa
68. Ternate
69. Tidore
70. Timika
71. Tual
72. Waikelo
73. Waingapu
74. Wanc

In [13]:
def jssp_transform(file_path, output_path):
    # 1. Load & Parse
    print(f"Membaca file dari: {file_path}")
    df = pd.read_csv(file_path, dtype={'VOYAGE': str})
    
    # Helper Parsing
    ID_TO_EN = {'Des': 'Dec', 'Mei': 'May', 'Agt': 'Aug', 'Agu': 'Aug', 
                'Okt': 'Oct', 'Juli': 'Jul', 'Juni': 'Jun'}

    def parse_dt(d, t):
        if pd.isna(d) or pd.isna(t): return pd.NaT
        d_str = str(d).strip()
        for id_m, en_m in ID_TO_EN.items():
            if id_m in d_str: d_str = d_str.replace(id_m, en_m)
        try:
            return datetime.strptime(f"{d_str} {str(t).strip()}", "%d-%b-%y %H:%M")
        except:
            return pd.NaT

    df['ETA_FULL'] = df.apply(lambda x: parse_dt(x['ETA_TANGGAL'], x['ETA_JAM']), axis=1)
    df['ETD_FULL'] = df.apply(lambda x: parse_dt(x['ETD_TANGGAL'], x['ETD_JAM']), axis=1)
    df = df.dropna(subset=['ETA_FULL']).sort_values('ETA_FULL')

    # 2. Mappings
    unique_ports = sorted(df['PELABUHAN'].unique())
    port_map = {p: i+1 for i, p in enumerate(unique_ports)}
    df['Machine_ID'] = df['PELABUHAN'].map(port_map)
    
    df['JOB_KEY'] = df['NAMA_KAPAL'] + "_" + df['VOYAGE']
    unique_jobs = df['JOB_KEY'].unique()
    job_map = {k: i+1 for i, k in enumerate(unique_jobs)}
    
    # 3. Time Reference (t=0)
    global_start = df['ETA_FULL'].min()
    
    jssp_rows = []
    
    # GROUPING JOB
    for job_key, group in df.groupby('JOB_KEY'):
        group = group.sort_values('ETA_FULL')
        records = group.to_dict('records') 
        seq = 1
        
        # [MODIFIKASI UTAMA]
        # Kita hanya looping sampai len(records) - 1.
        # Baris TERAKHIR (Destinasi Akhir Voyage) tidak dianggap sebagai operasi JSSP,
        # melainkan hanya sebagai acuan ETA untuk menghitung Travel Time operasi sebelumnya.
        
        for i in range(len(records) - 1):
            row = records[i]
            next_row = records[i + 1] # Aman diakses karena loop cuma sampai len-1
            
            # --- 1. Arrival Time (Ready Time) ---
            arr_rel = (row['ETA_FULL'] - global_start).total_seconds() / 3600.0
            
            # --- 2. Processing Time & Due Date ---
            if pd.notna(row['ETD_FULL']):
                proc_time = (row['ETD_FULL'] - row['ETA_FULL']).total_seconds() / 3600.0
                due_date_rel = (row['ETD_FULL'] - global_start).total_seconds() / 3600.0
            else:
                # Fallback jika ETD kosong di tengah voyage (jarang terjadi di data bersih)
                proc_time = 0.0
                due_date_rel = arr_rel 
            
            # --- 3. Travel Time Logic ---
            # Sekarang logic ini PASTI jalan dan PASTI punya next_row
            travel_time = 0.0
            
            if pd.notna(row['ETD_FULL']) and pd.notna(next_row['ETA_FULL']):
                travel_delta = next_row['ETA_FULL'] - row['ETD_FULL']
                travel_time = travel_delta.total_seconds() / 3600.0
                # Cegah nilai negatif jika jadwal berantakan
                travel_time = max(0.0, travel_time)
            
            jssp_rows.append({
                'Job_ID': job_map[job_key],
                'Job_Label': job_key,
                'Operation_Seq': seq,
                'Machine_ID': row['Machine_ID'],
                'Port_Label': row['PELABUHAN'],
                'Travel_Time': round(travel_time, 2),
                'Arrival_Time': round(arr_rel, 2),
                'Proc_Time': max(0.0, round(proc_time, 2)),
                'Due_Date': round(due_date_rel, 2),
            })
            seq += 1
            
            # Baris terakhir (Destinasi) otomatis ter-skip oleh range loop,
            # tapi datanya (ETA) sudah terpakai di perhitungan 'travel_time' iterasi terakhir.
    
    final_df = pd.DataFrame(jssp_rows)  
    final_df = final_df.sort_values(by=['Job_ID', 'Operation_Seq'])
    final_df.to_csv(output_path, index=False)

    print(f"Data tersimpan di {output_path}")

OUTPUT = '/home/re1jie/TA-code/JSSP-CAOA-SSR/Data/transformed_data.csv'
jssp_transform(DATA_INPUT, OUTPUT)

Membaca file dari: /home/re1jie/TA-code/JSSP-CAOA-SSR/Data/raw_data.csv
Data tersimpan di /home/re1jie/TA-code/JSSP-CAOA-SSR/Data/transformed_data.csv
