In [None]:
# Standard library imports
import sys
import time
import datetime
import itertools
import importlib

# Third-party imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.mixture import BayesianGaussianMixture
from shapely.geometry import shape
import nimfa
import scipy
from scipy import stats
from pathlib import Path

# Set up plotting style (optional, but professional)
sns.set(style="whitegrid")

<module 'utils' from '/content/utils.py'>

# Define Folder Path & Sensor IDs

In [None]:
from pathlib import Path

# Define the base data folder
FolderPath = Path(".../Data/NorSwe/")  # Change to your path

Sensors = [
    "01777V885181", "77275V885276", "35829V885266", "99923V578123",
    "50089V578151", "84237V578097", "76778V704564", "69140V704643",
    "57929V705247", "52209V971422", "00737V704646", "94864V704707",
    "94299V704696", "05732V971567", "21405V2607269", "09269V971425",
    "02535V971411", "04904V971774", "35229V971507"
]

fit_dates = {
    "01777V885181": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "77275V885276": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "35829V885266": [datetime.date(2018, 1, 15),datetime.date(2020,3,12)],
    "99923V578123": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "50089V578151": [datetime.date(2019, 1, 1),datetime.date(2019,12,31)],
    "84237V578097": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "76778V704564": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "69140V704643": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "57929V705247": [datetime.date(2017, 1, 1),datetime.date(2018,5,30)],
    "52209V971422": [datetime.date(2018, 1, 1),datetime.date(2020,3,12)],
    "00737V704646": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "94864V704707": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "94299V704696": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "05732V971567": [datetime.date(2017, 10, 1),datetime.date(2020,3,12)],
    "21405V2607269": [datetime.date(2019, 3, 1),datetime.date(2020,3,12)],
    "09269V971425": [datetime.date(2018, 3, 1),datetime.date(2020,3,12)],
    "02535V971411": [datetime.date(2019, 1, 1),datetime.date(2020,3,12)],
    "04904V971774": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)],
    "35229V971507": [datetime.date(2017, 1, 1),datetime.date(2020,3,12)]
}


# Read Sensor CSV Files

In [None]:
sensor_dict = {}
all_data = []
for s in Sensors:
    df = pd.read_csv(FolderPath + s + '_by_length_hour.csv', sep=',',
                     usecols=['sensor_id', 'from_date', 'to_date', 'from_hour', 'to_hour',
                              'sensor_dir', 'short_vehicles', 'long_vehicles', 'unknown_length'],
                     parse_dates=['from_date', 'to_date'])
    sensor_dict[s] = {
        'Start': df['from_date'].min(),
        'End': df['to_date'].max(),
        'Directions': df['sensor_dir'].unique()
    }
    all_data.append(df)

# Standardize Direction Labels

In [None]:
to_norway = [
    'Trældal x Ev6', 'Hestbrinken', 'Mo i Rana', 'Trofors', 'Hattfjelldalen',
    'Gjersvika', 'Sandvika', 'Nordli', 'Verdalsøra', 'Meråker', 'Drevsjø',
    'ØSTBY', 'X/RV 25', 'NYBERGSUND', 'Holtet', 'Røgden', 'ØYERMOEN XF202',
    'KONGSVINGER', 'BEKKENGA', 'Oslo', 'Halden', 'HALDEN', 'OSLO'
]

for df in all_data:
    df['sensor_dir'] = np.where(df['sensor_dir'].isin(to_norway), 'NOR', 'SWE')


In [None]:
def agg_APIdata_NORSWE(data,WhichVehicles,min_date = datetime.datetime(2017, 1, 1)  , max_date = datetime.datetime(2023, 5, 22)):

#WhichVehicles can be 'Small', 'Heavy', 'Total' or 'Both'

    # What vehicle lengths do we want
    if WhichVehicles == 'Total':
        data['total_vehicles'] = data['short_vehicles'] + data['long_vehicles'] + data['unknown_length']
    elif WhichVehicles == 'Small':
        data = data.rename(columns={'short_vehicles':'total_vehicles'})
    elif WhichVehicles == 'Heavy':
        data = data.rename(columns={'long_vehicles': 'total_vehicles'})

    # Create sensor direction and origin columns
    data = data.rename(columns = {'sensor_dir': 'dest_country'})
    data['origin_country'] = np.where(data['dest_country'] == 'NOR', 'SWE','NOR')

    data['sensor_id'] = data['sensor_id'].astype(str)

    data['sensor_origin'] = data[['sensor_id', 'origin_country']].agg(', '.join, axis=1)
    data['sensor_destination'] = data[['sensor_id', 'dest_country']].agg(', '.join, axis=1)

    ## Add some temporal informaiton.

    data = data.drop(['to_date'], axis = 1).rename(columns = {'from_date':'date'})

    data['minute'] = datetime.timedelta(minutes = 0)


    data = data[~(data['from_hour'] == data['to_hour'])].rename(columns = {'from_hour':'hour'})

    data['hour'] = pd.to_timedelta(data['hour'].apply(lambda x: int(x[:2])), unit='h')

    data['date'] = data['date'] + data['hour'] + data['minute']
    data = data[(data.date > min_date) & (data.date < max_date)].copy()

    if WhichVehicles == 'Both':
        data = data[['sensor_origin','sensor_destination','date','small_vehicles','long_vehicles','unknown_length']].copy()

    else:
        data = data[['sensor_origin', 'sensor_destination', 'date', 'total_vehicles']].copy().reset_index()
        rm_idx = np.where(np.isnan(data.total_vehicles))
        data = data.drop(index = rm_idx[0])
        data['total_vehicles'] = data['total_vehicles'].apply(lambda x: int(x))

        f = lambda x: x.reindex(pd.date_range(min_date,
                                                max_date,
                                                name='date',
                                                freq='1h'), fill_value=0)


        data = (data.set_index('date')
                    .groupby(["sensor_origin", "sensor_destination"])["total_vehicles"]
                    .apply(f)
                    .reset_index())
        data = data.pivot_table(index=["sensor_origin","sensor_destination"], columns=["date"],values=["total_vehicles"] ).droplevel(level = 0,axis = 1 )

    return(data)

# Aggregate Data

In [None]:
agg_data = []
for df in all_data:
    d = agg_APIdata_NORSWE(df, 'Small',
        min_date=datetime.datetime(2017, 1, 1),
        max_date=datetime.datetime(2023, 12, 31))
    agg_data.append(d)


# Fit Bayesian GMMs

In [None]:
models = []
data = []
for df in agg_data:
    sensor_id = df.index[0][0].split(',')[0]
    d1, d2 = fit_dates[sensor_id]
    mod, dat = utils.fit_period(df, d1=d1, d2=d2, hourly=True,
                                 nSamp=10000, Normalize=False, N=10,
                                 seed=2024, FitMethod='Bayesian')
    models.append(mod)
    data.append(dat)

# Combine and Save
models = pd.concat(models, axis=1)
data = pd.concat(data, axis=0)
agg_data = pd.concat(agg_data, axis=0)



  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitMethod = FitMethod))
  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitMethod = FitMethod))
  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitMethod = FitMethod))
  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitMethod = FitMethod))
  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitMethod = FitMethod))
  models = data[c].reset_index("weekday").groupby("weekday").agg(lambda x : fit_model(x,hourly_data, nSamp, N = N, hourminute = True,seed = seed, FitM

# Save the fitted models and processed data to disk for later use


In [None]:
BASE_DIR = Path(".../Data/NorSwe_GMM")  # cleaner & portable
MODELS_FILE = BASE_DIR / "models_nor.pkl"
DATA_FILE = BASE_DIR / "data_nor.pkl"
AGG_DATA_FILE = BASE_DIR / "agg_data_nor.pkl"

# Save
models.to_pickle(MODELS_FILE)
data.to_pickle(DATA_FILE)
agg_data.to_pickle(AGG_DATA_FILE)
