In [None]:
# Standard library imports
import sys
import time
import datetime
import itertools
import importlib

# Third-party imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.mixture import BayesianGaussianMixture
from shapely.geometry import shape
import nimfa
import scipy
from scipy import stats

# Add project folder to path if needed
# sys.path.append('path/to/your/project/data')  # Uncomment and set if you have custom modules

# Set up plotting style (optional, but professional)
sns.set(style="whitegrid")

Collecting nimfa
  Downloading nimfa-1.4.0-py2.py3-none-any.whl.metadata (3.8 kB)
Downloading nimfa-1.4.0-py2.py3-none-any.whl (4.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nimfa
Successfully installed nimfa-1.4.0
Mounted at /content/drive


#NorSwe-data-visualisation






In [None]:
import pandas as pd
from pathlib import Path

# Define the folder path relative to the project root
folder_path = Path("../data/NorSwe")  # Adjust the path as needed



# List of sensor IDs
sensor_ids = [
    "01777V885181", "77275V885276", "35829V885266", "99923V578123",
    "50089V578151", "84237V578097", "76778V704564", "69140V704643",
    "57929V705247", "52209V971422", "00737V704646", "94864V704707",
    "94299V704696", "05732V971567", "21405V2607269", "09269V971425",
    "02535V971411", "04904V971774", "35229V971507"
]

# List to hold all dataframes
all_data = []

# Loop through each sensor ID and read the corresponding CSV
for sensor_id in sensor_ids:
    file_path = f"{folder_path}{sensor_id}_by_length_hour.csv"
    df = pd.read_csv(
        file_path,
        sep=',',
        usecols=[
            'sensor_id', 'from_date', 'to_date', 'from_hour', 'to_hour',
            'sensor_dir', 'short_vehicles', 'long_vehicles', 'unknown_length'
        ],
        parse_dates=['from_date', 'to_date']
    )
    df['sensor_id'] = sensor_id  # Ensure the sensor_id is included
    all_data.append(df)

# Concatenate all dataframes into one
df_all = pd.concat(all_data, ignore_index=True)

# Display the first few rows and the shape of the combined dataframe
print(df_all.head())
print(df_all.shape)


      sensor_id  from_date    to_date from_hour   to_hour     sensor_dir  \
0  01777V885181 2017-01-01 2017-01-01  01:00:00  02:00:00  Trældal x Ev6   
1  01777V885181 2017-01-01 2017-01-01  01:00:00  02:00:00    Riksgrensen   
2  01777V885181 2017-01-01 2017-01-01  02:00:00  03:00:00  Trældal x Ev6   
3  01777V885181 2017-01-01 2017-01-01  02:00:00  03:00:00    Riksgrensen   
4  01777V885181 2017-01-01 2017-01-01  03:00:00  04:00:00  Trældal x Ev6   

   short_vehicles  long_vehicles  unknown_length  
0             1.0            0.0             0.0  
1             3.0            0.0             0.0  
2             1.0            1.0             1.0  
3             0.0            1.0             1.0  
4             0.0            0.0             0.0  
(2185662, 9)


In [None]:
to_norway = [
    'Trældal x Ev6', 'Hestbrinken', 'Mo i Rana', 'Trofors', 'Hattfjelldalen',
    'Gjersvika', 'Sandvika', 'Nordli', 'Verdalsøra', 'Meråker', 'Drevsjø',
    'ØSTBY', 'X/RV 25', 'NYBERGSUND', 'Holtet', 'Røgden', 'ØYERMOEN XF202',
    'KONGSVINGER', 'BEKKENGA', 'Oslo', 'Halden', 'HALDEN', 'OSLO'
]

for df in all_data:
    df['sensor_dir'] = np.where(df['sensor_dir'].isin(to_norway), 'NOR', 'SWE')
df_all = pd.concat(all_data, ignore_index=True)
df_all.head()

Unnamed: 0,sensor_id,from_date,to_date,from_hour,to_hour,sensor_dir,short_vehicles,long_vehicles,unknown_length
0,01777V885181,2017-01-01,2017-01-01,01:00:00,02:00:00,NOR,1.0,0.0,0.0
1,01777V885181,2017-01-01,2017-01-01,01:00:00,02:00:00,SWE,3.0,0.0,0.0
2,01777V885181,2017-01-01,2017-01-01,02:00:00,03:00:00,NOR,1.0,1.0,1.0
3,01777V885181,2017-01-01,2017-01-01,02:00:00,03:00:00,SWE,0.0,1.0,1.0
4,01777V885181,2017-01-01,2017-01-01,03:00:00,04:00:00,NOR,0.0,0.0,0.0


#aggregated data

In [None]:
from pathlib import Path

# Define the path relative to the notebook or project root
agg_data_nor_path = Path("../data/NorSwe_GMM/agg_data_nor.pkl")
agg_data_nor = pd.read_pickle(agg_data_nor_path)
# Show the first few rows
display(agg_data_nor.head())

# Show the shape (rows x columns)
print("Shape of agg_data_nor:", agg_data_nor.shape)

# Show column names (optional)
print("Column names sample:")
print(agg_data_nor.columns[:10])


Unnamed: 0_level_0,date,2017-01-01 00:00:00,2017-01-01 01:00:00,2017-01-01 02:00:00,2017-01-01 03:00:00,2017-01-01 04:00:00,2017-01-01 05:00:00,2017-01-01 06:00:00,2017-01-01 07:00:00,2017-01-01 08:00:00,2017-01-01 09:00:00,...,2023-12-30 15:00:00,2023-12-30 16:00:00,2023-12-30 17:00:00,2023-12-30 18:00:00,2023-12-30 19:00:00,2023-12-30 20:00:00,2023-12-30 21:00:00,2023-12-30 22:00:00,2023-12-30 23:00:00,2023-12-31 00:00:00
sensor_origin,sensor_destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"01777V885181, NOR","01777V885181, SWE",0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,4.0,...,35.0,16.0,19.0,8.0,10.0,1.0,1.0,1.0,1.0,0.0
"01777V885181, SWE","01777V885181, NOR",0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,...,30.0,22.0,18.0,12.0,9.0,8.0,1.0,0.0,3.0,0.0
"77275V885276, NOR","77275V885276, SWE",0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,...,7.0,4.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
"77275V885276, SWE","77275V885276, NOR",0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,10.0,11.0,15.0,6.0,4.0,4.0,1.0,1.0,0.0,0.0
"35829V885266, NOR","35829V885266, SWE",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Shape of agg_data_nor: (38, 61321)
Column names sample:
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
               '2017-01-01 02:00:00', '2017-01-01 03:00:00',
               '2017-01-01 04:00:00', '2017-01-01 05:00:00',
               '2017-01-01 06:00:00', '2017-01-01 07:00:00',
               '2017-01-01 08:00:00', '2017-01-01 09:00:00'],
              dtype='datetime64[ns]', name='date', freq=None)


#FinSwe-data-visualisation

In [None]:
from pathlib import Path

# Define the folder path relative to the project root
folder_path = Path("../data/FIN-SWE")  # Adjust the path as needed

Sensors = ["1433", "1432", "1435", "1436", "1431"]

In [None]:
# Initialize an empty DataFrame to store combined data from all sensors
all_data = pd.DataFrame()
for s in Sensors:
    # Read CSV file for the current sensor
    sensor_data = pd.read_csv(
        folder_path / f"{s}_by_length_minute.csv",
        sep=',',
        usecols=['TMS point id', 'year', 'days', 'hour', 'minute', 'v_type', 'direction', 'total_vehicles', 'date'],
        parse_dates=['date']
    )
    # Concatenate current sensor data to all_data DataFrame
    all_data = pd.concat([all_data, sensor_data])

# Select vehicle type for analysis: options are 'Small', 'Heavy', 'Total', or 'Both'
WhichVehicles = 'Small'

# Create a copy of the combined data for processing
data = all_data.copy()
print(data.head())


   TMS point id  year  days  hour  minute v_type  direction  total_vehicles  \
0          1433    17     1     0       2  <5.6m          2               1   
1          1433    17     1     0       3  <5.6m          1               1   
2          1433    17     1     0       6  <5.6m          2               2   
3          1433    17     1     0      18  <5.6m          1               1   
4          1433    17     1     0      20  <5.6m          1               1   

        date  
0 2017-01-01  
1 2017-01-01  
2 2017-01-01  
3 2017-01-01  
4 2017-01-01  


#aggregated data

In [None]:
from pathlib import Path

# Define the path relative to the notebook or project root
agg_data_fin_path = Path("../data/NorSwe_GMM/agg_data_fin.pkl")
agg_data_fin = pd.read_pickle(agg_data_fin_path)
# Show the first few rows
display(agg_data_fin.head())

# Show the shape (rows x columns)
print("Shape of agg_data_fin:", agg_data_fin.shape)

# Show column names (optional)
print("Column names sample:")
print(agg_data_fin.columns[:10])

Unnamed: 0_level_0,date,2017-01-01 00:01:00,2017-01-01 00:02:00,2017-01-01 00:03:00,2017-01-01 00:04:00,2017-01-01 00:05:00,2017-01-01 00:06:00,2017-01-01 00:07:00,2017-01-01 00:08:00,2017-01-01 00:09:00,2017-01-01 00:10:00,...,2023-12-30 23:50:00,2023-12-30 23:51:00,2023-12-30 23:52:00,2023-12-30 23:53:00,2023-12-30 23:54:00,2023-12-30 23:55:00,2023-12-30 23:56:00,2023-12-30 23:57:00,2023-12-30 23:58:00,2023-12-30 23:59:00
sensor_origin,sensor_destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"1431, FIN","1431, SWE",7.0,2.0,2.0,1.0,3.0,5.0,12.0,4.0,7.0,5.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
"1431, SWE","1431, FIN",2.0,0.0,2.0,0.0,3.0,11.0,14.0,11.0,8.0,10.0,...,1.0,3.0,2.0,1.0,2.0,1.0,1.0,0.0,0.0,2.0
"1432, FIN","1432, SWE",0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"1432, SWE","1432, FIN",1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
"1433, FIN","1433, SWE",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Shape of agg_data_nor: (10, 3679199)
Column names sample:
DatetimeIndex(['2017-01-01 00:01:00', '2017-01-01 00:02:00',
               '2017-01-01 00:03:00', '2017-01-01 00:04:00',
               '2017-01-01 00:05:00', '2017-01-01 00:06:00',
               '2017-01-01 00:07:00', '2017-01-01 00:08:00',
               '2017-01-01 00:09:00', '2017-01-01 00:10:00'],
              dtype='datetime64[ns]', name='date', freq=None)


# Sweden-Denmark Aggregated Data

In [None]:
from pathlib import Path

# Define the path relative to the notebook or project root
agg_data_den_path = Path("../data/NorSwe_GMM/agg_data_den.pkl")
agg_data_den = pd.read_pickle(agg_data_den_path)
# Show the first few rows
display(agg_data_den.head())

# Show the shape (rows x columns)
print("Shape of agg_data_den:", agg_data_den.shape)

# Show column names (optional)
print("Column names sample:")
print(agg_data_fin.columns[:10])

Unnamed: 0_level_0,date,2019-01-01 00:01:00,2019-01-01 00:02:00,2019-01-01 00:03:00,2019-01-01 00:04:00,2019-01-01 00:05:00,2019-01-01 00:06:00,2019-01-01 00:07:00,2019-01-01 00:08:00,2019-01-01 00:09:00,2019-01-01 00:10:00,...,2022-03-06 23:50:00,2022-03-06 23:51:00,2022-03-06 23:52:00,2022-03-06 23:53:00,2022-03-06 23:54:00,2022-03-06 23:55:00,2022-03-06 23:56:00,2022-03-06 23:57:00,2022-03-06 23:58:00,2022-03-06 23:59:00
sensor_origin,sensor_destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"53VIP0008, DEN","53VIP0008, SWE",4,8,5,3,3,1,2,6,3,4,...,0,3,0,1,0,0,2,2,0,0
"53VIP0008, SWE","53VIP0008, DEN",0,1,0,0,2,2,1,1,1,1,...,1,3,0,1,0,0,2,0,0,0


Shape of agg_data_nor: (2, 1671839)
Column names sample:
DatetimeIndex(['2019-01-01 00:01:00', '2019-01-01 00:02:00',
               '2019-01-01 00:03:00', '2019-01-01 00:04:00',
               '2019-01-01 00:05:00', '2019-01-01 00:06:00',
               '2019-01-01 00:07:00', '2019-01-01 00:08:00',
               '2019-01-01 00:09:00', '2019-01-01 00:10:00'],
              dtype='datetime64[ns]', name='date', freq=None)
