 Import libraries

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import torch


Load traffic data

In [7]:
#Folder containing traffic CSVs
traffic_folder = Path("../data/raw/traffic")

In [8]:
# Load all monthly traffic data files (March 2025 -> Oct 2025)
traffic_files = sorted(f for f in traffic_folder.glob("traffic_2025-*"))
traffic_df = pd.concat(
    [pd.read_csv(f) for f in traffic_files],
    ignore_index=True
)

print("Combined traffic data shape:", traffic_df.shape)

Combined traffic data shape: (234069, 11)


In [9]:
# Convert timestamps from string to datetime objects
traffic_df['timestamp'] = pd.to_datetime(traffic_df['timestamp'])
traffic_df['timestamp_local'] = traffic_df['timestamp'].dt.tz_convert("Europe/Belgrade")
traffic_df = traffic_df.sort_values('timestamp')

In [10]:
# Verify the count of unique undirected segments
unique_segments = traffic_df[['origin', 'destination']].apply(sorted, axis=1).apply(tuple).unique()
print("Number of unique road segments (undirected):", len(unique_segments))
print(unique_segments[:10]) 


Number of unique road segments (undirected): 8
[('Rrethi i Kazermes', 'Rrethi i Maxi 24')
 ('Semaforat Rruga B', 'Xhamia Llapit')
 ('Semaforat Rruga B', 'Xhamia e Llapit') ('Rrethi Flamurit', 'Te Qafa')
 ('Rrethi i Flamurit', 'Te Qafa')
 ('Rrethi i madh (Lakrishte)', 'Rrethi te Komuna e vjeter')
 ('Rrethi te Ismeti', 'Xhamia Llapit')
 ('Rrethi i Flamurit', 'Salla e sporteve 1 Tetori')]


Data Normalization

In [11]:
# Normalize casing and whitespace
for col in ['origin', 'destination']:
    traffic_df[col] = (traffic_df[col]
                       .astype(str)
                       .str.strip()
                       .str.replace(r'\s+', ' ', regex=True)
                       .str.lower())

In [12]:
normalized_names = {
    "xhamia llapit": "xhamia e llapit",
    "rrethi flamurit": "rrethi i flamurit",
}

# Apply replacements
traffic_df[['origin', 'destination']] = traffic_df[['origin', 'destination']].replace(normalized_names)

In [13]:
# Create columns for year, month, and date
traffic_df['date'] = traffic_df['timestamp'].dt.date
traffic_df['month'] = traffic_df['timestamp'].dt.to_period('M')

# Count dates per month
days_per_month = traffic_df.groupby('month')['date'].nunique().sort_index()
print(days_per_month)

month
2025-03     6
2025-04    30
2025-05    28
2025-06    20
2025-07    31
2025-08    28
2025-09    30
2025-10    31
Freq: M, Name: date, dtype: int64


  traffic_df['month'] = traffic_df['timestamp'].dt.to_period('M')


In [14]:
# Drop March because it only contains 6 days of data
traffic_df = traffic_df[traffic_df['month'] != pd.Period('2025-03', freq='M')]

In [15]:
# Check how many missing values per column
print(traffic_df.isna().sum())

_id                        0
origin                     0
origin_coords              0
destination                0
destination_coords         0
traffic_speed              0
congestion_level           0
distance                   0
polyline              188408
timestamp                  0
__v                        0
timestamp_local            0
date                       0
month                      0
dtype: int64


Drop Columns that add no modeling value

In [16]:
# _id: random MongoDB object ids - useless for ML
# - __v: legacy versioning field (constant 0 across all rows)
# - polyline: largely missing and unused in current model
# month : only useful for completeness analysis

traffic_df = traffic_df.drop(columns=['__v', 'polyline', '_id', 'month'])

In [17]:
traffic_df.dtypes

origin                                         object
origin_coords                                  object
destination                                    object
destination_coords                             object
traffic_speed                                 float64
congestion_level                              float64
distance                                        int64
timestamp                         datetime64[ns, UTC]
timestamp_local       datetime64[ns, Europe/Belgrade]
date                                           object
dtype: object

In [18]:
# Split coordinate strings into separate float columns
traffic_df[['origin_lat', 'origin_lon']] = traffic_df['origin_coords'].str.split(', ', expand=True).astype(float)
traffic_df[['dest_lat', 'dest_lon']] = traffic_df['destination_coords'].str.split(', ', expand=True).astype(float)


In [19]:
# Drop the original string columns
traffic_df = traffic_df.drop(columns=['origin_coords', 'destination_coords'])

Memory Optimization

In [20]:
# Downcast numerics
traffic_df['traffic_speed'] = pd.to_numeric(traffic_df['traffic_speed'], downcast='float')
traffic_df['congestion_level'] = pd.to_numeric(traffic_df['congestion_level'], downcast='float')
traffic_df['distance'] = pd.to_numeric(traffic_df['distance'], downcast='integer')

In [21]:
for col in ['origin', 'destination']:
    traffic_df[col] = traffic_df[col].astype('category')

In [22]:
# Sort by timestamp and reset index for chronological analysis
traffic_df = traffic_df.sort_values('timestamp').reset_index(drop=True)

In [23]:
traffic_df.dtypes

origin                                     category
destination                                category
traffic_speed                               float32
congestion_level                            float32
distance                                      int16
timestamp                       datetime64[ns, UTC]
timestamp_local     datetime64[ns, Europe/Belgrade]
date                                         object
origin_lat                                  float64
origin_lon                                  float64
dest_lat                                    float64
dest_lon                                    float64
dtype: object

In [24]:
traffic_df.shape

(214101, 12)

In [25]:
sampled_df = (traffic_df
              .set_index('timestamp')
              .groupby(['origin', 'destination'], observed=True)  # Fix warning 1
              .resample('20min')  # Fix warning 2
              .agg({
                  'traffic_speed': 'mean',
                  'congestion_level': 'mean',
                  'distance': 'first',  # Static per segment
                  'origin_lat': 'first',
                  'origin_lon': 'first', 
                  'dest_lat': 'first',
                  'dest_lon': 'first'
              })
              .reset_index())

print(f"Sampled data shape: {sampled_df.shape}")
print(f"Unique minutes: {sorted(sampled_df['timestamp'].dt.minute.unique())}")
print(f"Time range: {sampled_df['timestamp'].min()} to {sampled_df['timestamp'].max()}")

Sampled data shape: (184896, 10)
Unique minutes: [np.int32(0), np.int32(20), np.int32(40)]
Time range: 2025-04-01 00:00:00+00:00 to 2025-10-31 23:40:00+00:00


In [26]:
sampled_df['timestamp'] = sampled_df['timestamp'].dt.tz_localize(None)

Load weather data

In [29]:
#Folder containing weather CSVs
weather_folder = Path("../data/raw/weather")

In [30]:
# Load all monthly weather data files (March 2025 -> Oct 2025)
weather_files = sorted(f for f in weather_folder.glob("weather_2025-*"))
weather_df = pd.concat(
    [pd.read_csv(f) for f in weather_files],
    ignore_index=True
)

print("Combined weather data shape:", weather_df.shape)

Combined weather data shape: (5880, 3)


In [31]:
weather_df

Unnamed: 0,time,temperature,precipitation
0,2025-03-01T00:00,5.2,0.0
1,2025-03-01T01:00,5.2,0.0
2,2025-03-01T02:00,5.1,0.0
3,2025-03-01T03:00,5.1,0.0
4,2025-03-01T04:00,5.1,0.0
...,...,...,...
5875,2025-10-31T19:00,9.1,0.0
5876,2025-10-31T20:00,8.2,0.0
5877,2025-10-31T21:00,7.7,0.0
5878,2025-10-31T22:00,7.2,0.0


In [32]:
#Rename time → timestamp and convert to datetime
weather_df = weather_df.rename(columns={'time': 'timestamp'})
weather_df['timestamp'] = pd.to_datetime(weather_df['timestamp'])


In [33]:
weather_df = weather_df[weather_df['timestamp'].dt.month != 3]

In [34]:
#Set timestamp as index for resampling
weather_df = weather_df.set_index('timestamp').sort_index()


In [35]:
# Resample to 20-minute intervals + forward fill
weather_resampled = weather_df.resample('20min').ffill()

In [36]:
weather_resampled.shape

(15406, 2)

Combine datasets

In [37]:
traffic_weather_df = sampled_df.merge(weather_resampled, on='timestamp', how='inner')

In [38]:
#Overview of the merged datasets
traffic_weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184872 entries, 0 to 184871
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   origin            184872 non-null  category      
 1   destination       184872 non-null  category      
 2   timestamp         184872 non-null  datetime64[ns]
 3   traffic_speed     166509 non-null  float32       
 4   congestion_level  166509 non-null  float32       
 5   distance          166509 non-null  float64       
 6   origin_lat        166509 non-null  float64       
 7   origin_lon        166509 non-null  float64       
 8   dest_lat          166509 non-null  float64       
 9   dest_lon          166509 non-null  float64       
 10  temperature       184872 non-null  float64       
 11  precipitation     184872 non-null  float64       
dtypes: category(2), datetime64[ns](1), float32(2), float64(7)
memory usage: 13.0 MB


Handle Missing Data 

In [40]:
# Sort for consistency
df = traffic_weather_df.sort_values(['origin','destination','timestamp']).copy()


In [41]:
# Interpolate short gaps in dynamic features
dynamic_cols = ['traffic_speed', 'congestion_level']
df[dynamic_cols] = (
    df.groupby(['origin','destination'], observed=True)[dynamic_cols]
      .apply(lambda g: g.interpolate(limit=3).ffill().bfill())
      .reset_index(level=[0,1], drop=True)
)

In [42]:
# Fill static per-edge fields
static_cols = ['distance','origin_lat','origin_lon','dest_lat','dest_lon']
df[static_cols] = (
    df.groupby(['origin','destination'], observed=True)[static_cols]
      .transform(lambda g: g.ffill().bfill())
)

In [43]:
# Confirm no remaining missing values
print(df.isna().sum())

origin              0
destination         0
timestamp           0
traffic_speed       0
congestion_level    0
distance            0
origin_lat          0
origin_lon          0
dest_lat            0
dest_lon            0
temperature         0
precipitation       0
dtype: int64


In [44]:
df[['origin','destination']].drop_duplicates().shape


(12, 2)

Add Temporal Features

In [45]:
df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True)
df = df.sort_values('timestamp').set_index('timestamp')

# Convert to local timezone
df_local = df.tz_convert("Europe/Belgrade")

In [46]:
# Extract base temporal columns
df['hour'] = df_local.index.hour
df['dayofweek'] = df_local.index.dayofweek  # Monday=0
df['month'] = df_local.index.month
df['is_weekend'] = (df_local.index.dayofweek >= 5).astype('int8')

In [47]:
# Apply cyclical encodings

# Hour of day
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

# Day of week
df['dow_sin'] = np.sin(2 * np.pi * df['dayofweek'] / 7)
df['dow_cos'] = np.cos(2 * np.pi * df['dayofweek'] / 7)

# Month 
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

In [48]:
df

Unnamed: 0_level_0,origin,destination,traffic_speed,congestion_level,distance,origin_lat,origin_lon,dest_lat,dest_lon,temperature,...,hour,dayofweek,month,is_weekend,hour_sin,hour_cos,dow_sin,dow_cos,month_sin,month_cos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-04-01 00:00:00+00:00,rrethi i flamurit,salla e sporteve 1 tetori,31.477501,-4.927500,1376.0,42.646832,21.156087,42.653815,21.167529,6.5,...,2,1,4,0,0.5,0.866025,0.781831,0.623490,0.866025,-0.500000
2025-04-01 00:00:00+00:00,rrethi i madh (lakrishte),rrethi te komuna e vjeter,33.197502,-10.655000,2420.0,42.653122,21.146059,42.666980,21.163980,6.5,...,2,1,4,0,0.5,0.866025,0.781831,0.623490,0.866025,-0.500000
2025-04-01 00:00:00+00:00,rrethi i maxi 24,rrethi i kazermes,27.445000,8.515000,3235.0,42.652650,21.175016,42.650434,21.138832,6.5,...,2,1,4,0,0.5,0.866025,0.781831,0.623490,0.866025,-0.500000
2025-04-01 00:00:00+00:00,te qafa,rrethi i flamurit,22.952499,23.482500,2576.0,42.665365,21.160591,42.648040,21.157600,6.5,...,2,1,4,0,0.5,0.866025,0.781831,0.623490,0.866025,-0.500000
2025-04-01 00:00:00+00:00,rrethi te ismeti,xhamia e llapit,29.514999,1.610000,3154.0,42.658080,21.137243,42.671410,21.161750,6.5,...,2,1,4,0,0.5,0.866025,0.781831,0.623490,0.866025,-0.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-10-31 23:00:00+00:00,rrethi i kazermes,rrethi i maxi 24,22.709999,24.309999,3242.0,42.650316,21.138918,42.652456,21.175115,6.8,...,0,5,11,1,0.0,1.000000,-0.974928,-0.222521,-0.500000,0.866025
2025-10-31 23:00:00+00:00,rrethi i flamurit,te qafa,11.330000,62.240002,2621.0,42.648019,21.157728,42.665520,21.160670,6.8,...,0,5,11,1,0.0,1.000000,-0.974928,-0.222521,-0.500000,0.866025
2025-10-31 23:00:00+00:00,rrethi i flamurit,salla e sporteve 1 tetori,23.700001,21.000000,1376.0,42.646832,21.156087,42.653815,21.167529,6.8,...,0,5,11,1,0.0,1.000000,-0.974928,-0.222521,-0.500000,0.866025
2025-10-31 23:00:00+00:00,xhamia e llapit,rrethi te ismeti,27.469999,8.430000,3037.0,42.671426,21.161827,42.658100,21.137130,6.8,...,0,5,11,1,0.0,1.000000,-0.974928,-0.222521,-0.500000,0.866025


In [50]:
# Save the cleaned dataset
output_path = "../data/processed/traffic_weather_clean.csv"
df.to_csv(output_path, index=False)  # index=True to keep timestamp as a column
print(f"Cleaned dataset saved to: {output_path}")


Cleaned dataset saved to: ../data/processed/traffic_weather_clean.csv


In [51]:
# Save the cleaned dataset as Parquet (for storage efficiency)
output_path = "../data/processed/traffic_weather_clean.parquet"
df.to_parquet(output_path, index=True, engine='pyarrow')
print(f"Cleaned dataset saved to: {output_path}")

Cleaned dataset saved to: ../data/processed/traffic_weather_clean.parquet


### Graph Structure Definition

In [53]:
# Load clean data
df = pd.read_parquet("../data/processed/traffic_weather_clean.parquet")

In [54]:
# Convert origin/destination to category - more efficient for GNN
df['origin'] = df['origin'].astype('category')
df['destination'] = df['destination'].astype('category')

# Create categorical codes for modeling
df['origin_code'] = df['origin'].cat.codes
df['destination_code'] = df['destination'].cat.codes

In [55]:
# human-readable segment key: "Origin → Destination"
df['segment_key'] = df['origin'].str.cat(df['destination'], sep="→")

In [56]:
# Unique directed segments
segments = sorted(df['segment_key'].unique())
print(f"Number of directed segments (nodes): {len(segments)}")
for i, seg in enumerate(segments):
    print(f"Node {i}: {seg}")


Number of directed segments (nodes): 12
Node 0: rrethi i flamurit→salla e sporteve 1 tetori
Node 1: rrethi i flamurit→te qafa
Node 2: rrethi i kazermes→rrethi i maxi 24
Node 3: rrethi i madh (lakrishte)→rrethi te komuna e vjeter
Node 4: rrethi i maxi 24→rrethi i kazermes
Node 5: rrethi te ismeti→xhamia e llapit
Node 6: rrethi te komuna e vjeter→rrethi i madh (lakrishte)
Node 7: salla e sporteve 1 tetori→rrethi i flamurit
Node 8: semaforat rruga b→xhamia e llapit
Node 9: te qafa→rrethi i flamurit
Node 10: xhamia e llapit→rrethi te ismeti
Node 11: xhamia e llapit→semaforat rruga b


In [57]:
# Map segment_key ↔ integer index
segment_to_id = {seg: i for i, seg in enumerate(segments)}
id_to_segment = {i: seg for seg, i in segment_to_id.items()}


In [58]:
# Time series lives on road segments, each segment must get a node index
# Add numeric segment index to the dataframe
df['segment_idx'] = df['segment_key'].map(segment_to_id).astype('int64')
# Reset index to make timestamp a column, then view
df.reset_index()[['timestamp', 'origin', 'destination', 'segment_key', 'segment_idx']].head()

Unnamed: 0,timestamp,origin,destination,segment_key,segment_idx
0,2025-04-01 00:00:00+00:00,rrethi i flamurit,salla e sporteve 1 tetori,rrethi i flamurit→salla e sporteve 1 tetori,0
1,2025-04-01 00:00:00+00:00,rrethi i madh (lakrishte),rrethi te komuna e vjeter,rrethi i madh (lakrishte)→rrethi te komuna e v...,3
2,2025-04-01 00:00:00+00:00,rrethi i maxi 24,rrethi i kazermes,rrethi i maxi 24→rrethi i kazermes,4
3,2025-04-01 00:00:00+00:00,te qafa,rrethi i flamurit,te qafa→rrethi i flamurit,9
4,2025-04-01 00:00:00+00:00,rrethi te ismeti,xhamia e llapit,rrethi te ismeti→xhamia e llapit,5


In [59]:
# One row per segment with its origin + destination
segments_df = (
    df[['segment_idx', 'origin', 'destination', 'distance',
        'origin_lat', 'origin_lon', 'dest_lat', 'dest_lon']]
    .drop_duplicates(subset=['segment_idx'])
    .set_index('segment_idx')
    .sort_index()
)

print(f"Segments table shape: {segments_df.shape}")
segments_df.head(len(segments_df))  # small graph, can inspect all

Segments table shape: (12, 7)


Unnamed: 0_level_0,origin,destination,distance,origin_lat,origin_lon,dest_lat,dest_lon
segment_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,rrethi i flamurit,salla e sporteve 1 tetori,1376.0,42.646832,21.156087,42.653815,21.167529
1,rrethi i flamurit,te qafa,2621.0,42.648019,21.157728,42.66552,21.16067
2,rrethi i kazermes,rrethi i maxi 24,3242.0,42.650316,21.138918,42.652456,21.175115
3,rrethi i madh (lakrishte),rrethi te komuna e vjeter,2420.0,42.653122,21.146059,42.66698,21.16398
4,rrethi i maxi 24,rrethi i kazermes,3235.0,42.65265,21.175016,42.650434,21.138832
5,rrethi te ismeti,xhamia e llapit,3154.0,42.65808,21.137243,42.67141,21.16175
6,rrethi te komuna e vjeter,rrethi i madh (lakrishte),2461.0,42.666861,21.163956,42.65314,21.14596
7,salla e sporteve 1 tetori,rrethi i flamurit,1391.0,42.653842,21.167468,42.64693,21.15603
8,semaforat rruga b,xhamia e llapit,3169.0,42.649166,21.172731,42.671331,21.161789
9,te qafa,rrethi i flamurit,2576.0,42.665365,21.160591,42.64804,21.1576


In [60]:
# Build adjacency between segment-nodes
edges_src = []
edges_dst = []

for i, row_i in segments_df.iterrows():
    for j, row_j in segments_df.iterrows():
        # You can drive from segment i into segment j
        if row_i['destination'] == row_j['origin']:
            edges_src.append(i)
            edges_dst.append(j)

num_nodes = len(segments_df)
print(f"Num nodes : {num_nodes}")
print(f"Num directed edges : {len(edges_src)}")


Num nodes : 12
Num directed edges : 16


In [61]:
#Add self loops so each node sees itself
add_self_loops = True

if add_self_loops:
    for k in range(num_nodes):
        edges_src.append(k)
        edges_dst.append(k)

print(f"Num directed edges after self-loops: {len(edges_src)}")

Num directed edges after self-loops: 28


In [62]:
edge_index = torch.tensor([edges_src, edges_dst], dtype=torch.long)

print("edge_index shape:", edge_index.shape)   # [2, E]
print("First 10 edges (src -> dst):")
for k in range(min(10, edge_index.shape[1])):
    s = edge_index[0, k].item()
    t = edge_index[1, k].item()
    print(f"  {s} ({id_to_segment[s]}) -> {t} ({id_to_segment[t]})")

    
#unweighted
edge_weight = torch.ones(edge_index.shape[1], dtype=torch.float)

edge_index shape: torch.Size([2, 28])
First 10 edges (src -> dst):
  0 (rrethi i flamurit→salla e sporteve 1 tetori) -> 7 (salla e sporteve 1 tetori→rrethi i flamurit)
  1 (rrethi i flamurit→te qafa) -> 9 (te qafa→rrethi i flamurit)
  2 (rrethi i kazermes→rrethi i maxi 24) -> 4 (rrethi i maxi 24→rrethi i kazermes)
  3 (rrethi i madh (lakrishte)→rrethi te komuna e vjeter) -> 6 (rrethi te komuna e vjeter→rrethi i madh (lakrishte))
  4 (rrethi i maxi 24→rrethi i kazermes) -> 2 (rrethi i kazermes→rrethi i maxi 24)
  5 (rrethi te ismeti→xhamia e llapit) -> 10 (xhamia e llapit→rrethi te ismeti)
  5 (rrethi te ismeti→xhamia e llapit) -> 11 (xhamia e llapit→semaforat rruga b)
  6 (rrethi te komuna e vjeter→rrethi i madh (lakrishte)) -> 3 (rrethi i madh (lakrishte)→rrethi te komuna e vjeter)
  7 (salla e sporteve 1 tetori→rrethi i flamurit) -> 0 (rrethi i flamurit→salla e sporteve 1 tetori)
  7 (salla e sporteve 1 tetori→rrethi i flamurit) -> 1 (rrethi i flamurit→te qafa)


In [63]:
# Adding node-level static features
# distance (normalized)
distances = segments_df['distance'].values  # length = num_nodes
dist_mean = distances.mean()
dist_std = distances.std() if distances.std() > 0 else 1.0
distances_norm = (distances - dist_mean) / dist_std

node_static_features = torch.from_numpy(
    distances_norm.reshape(-1, 1)  # shape [num_nodes, 1]
).float()

print(node_static_features.shape)


torch.Size([12, 1])


In [64]:
#Save graph structure for later
graph_data = {
    'edge_index': edge_index,
    'edge_weight': edge_weight,
    'num_nodes': num_nodes,
    'segment_to_id': segment_to_id,
    'id_to_segment': id_to_segment,
    'node_static_features': node_static_features if 'node_static_features' in locals() else None,
}

out_path = Path("data") / "processed" / "segment_graph.pt"
out_path.parent.mkdir(parents=True, exist_ok=True)

torch.save(graph_data, out_path)
print(f"Saved segment graph to: {out_path}")


Saved segment graph to: data/processed/segment_graph.pt


In [65]:
#Saving the cleaned + segment-indexed dataframe
traffic_weather_with_segments = df.copy()

traffic_weather_with_segments = df.reset_index()

output_path = "../data/processed/traffic_weather_with_segments.parquet"
traffic_weather_with_segments.to_parquet(output_path, index=False)
print(f"Saved cleaned data with segment_idx to: {output_path}")

Saved cleaned data with segment_idx to: ../data/processed/traffic_weather_with_segments.parquet
