In [1]:
import os
import pandas as pd
import numpy as np

BASE_PATH = "E:/C-DAC/Major Project/AI-Based-Maritime-Port-Intelligence"

SILVER_PATH = os.path.join(BASE_PATH, "data", "silver")
GOLD_PATH = os.path.join(BASE_PATH, "data", "gold")

os.makedirs(GOLD_PATH, exist_ok=True)

port_events = pd.read_csv(os.path.join(SILVER_PATH, "port_events.csv"))


In [2]:
time_cols = [
    "etaSchedule", "eta", "ata",
    "etdSchedule", "etd", "atd"
]

for col in time_cols:
    if col in port_events.columns:
        port_events[col] = pd.to_datetime(port_events[col], errors="coerce")


## Time - Based Features

In [3]:
port_events["arrival_hour"] = port_events["ata"].dt.hour
port_events["arrival_dayofweek"] = port_events["ata"].dt.dayofweek
port_events["is_weekend"] = port_events["arrival_dayofweek"].apply(
    lambda x: 1 if x >= 5 else 0
)


### Delay Based Features

In [4]:
port_events["arrival_delay_hrs"] = port_events["arrival_delay_hrs"].fillna(0)
port_events["departure_delay_hrs"] = port_events["departure_delay_hrs"].fillna(0)
port_events["port_stay_hrs"] = port_events["port_stay_hrs"].fillna(0)

## Congestion Severity Levels

In [6]:
def congestion_level(hours):
    if hours <= 12:
        return "Low"
    elif hours <= 24:
        return "Medium"
    else:
        return "High"

port_events["congestion_level"] = port_events["port_stay_hrs"].apply(congestion_level)


In [7]:
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder(categories=[["Low", "Medium", "High"]])

port_events["congestion_label"] = encoder.fit_transform(
    port_events[["congestion_level"]]
).astype(int)


## Encode Categorical Variables

### Ports (Arrival & Departure)

In [9]:
port_events = pd.get_dummies(port_events, columns=['arrival_port', 'departure_port'], drop_first = True)

## Final Feature Selection

In [10]:
FEATURE_COLS = [
    "arrival_hour",
    "arrival_dayofweek",
    "is_weekend",
    "arrival_delay_hrs",
    "departure_delay_hrs",
    "port_stay_hrs",
    "congestion_label"
]

PORT_COLS = [c for c in port_events.columns if c.startswith("arrival_port_") or c.startswith("departure_port_")]

final_features = port_events[FEATURE_COLS + PORT_COLS]
