In [1]:
# ingestion/fetch_queue_events.py

import requests
import json
import os
from dotenv import load_dotenv
import pandas as pd

In [2]:
load_dotenv()
API_KEY = os.getenv("TRAFIKVERKET_API_KEY")

In [3]:
def fetch_queue_events():
    url = "https://api.trafikinfo.trafikverket.se/v2/data.json"
    
    xml_query = f"""
    <REQUEST>
      <LOGIN authenticationkey="{API_KEY}"/>
      <QUERY objecttype="TrafficFlow" namespace="Road.TrafficInfo" schemaversion="1.5">
        <INCLUDE>AverageVehicleSpeed</INCLUDE>
        <INCLUDE>VehicleFlowRate</INCLUDE>
        <INCLUDE>MeasurementTime</INCLUDE>
        <INCLUDE>Geometry</INCLUDE>
        <INCLUDE>VehicleType</INCLUDE>
        <INCLUDE>CountyNo</INCLUDE>
        <INCLUDE>RegionId</INCLUDE>
        <INCLUDE>SiteId</INCLUDE>
      </QUERY>
    </REQUEST>
    """

    response = requests.post(url, data=xml_query.encode('utf-8'), headers={"Content-Type": "text/xml"})
    
    if response.status_code == 200:
        data = response.json()
        return data['RESPONSE']['RESULT'][0]['TrafficFlow']
    else:
        print("Error:", response.status_code)
        return []

In [4]:
if __name__ == "__main__":
    data = fetch_queue_events()
    #print(data)  # Preview first two entries

In [21]:
df = pd.DataFrame(data)

In [22]:
df.isnull().sum()

SiteId                 0
MeasurementTime        0
VehicleType            0
VehicleFlowRate        0
AverageVehicleSpeed    0
CountyNo               0
Geometry               0
RegionId               0
dtype: int64

In [6]:
df.shape

(3534, 8)

In [7]:
df.head(5) # Preview first two entries

Unnamed: 0,SiteId,MeasurementTime,VehicleType,VehicleFlowRate,AverageVehicleSpeed,CountyNo,Geometry,RegionId
0,40,2025-07-29T14:04:00.000+02:00,anyVehicle,1200,68.6,1,"{'SWEREF99TM': 'POINT (677754.96 6578623.19)',...",4
1,4306,2025-07-29T14:04:00.000+02:00,anyVehicle,960,64.15,1,"{'SWEREF99TM': 'POINT (674788.02 6583302.84)',...",4
2,861,2025-07-29T14:04:00.000+02:00,anyVehicle,780,89.64,1,"{'SWEREF99TM': 'POINT (651018.96 6563771.14)',...",4
3,1471,2025-07-29T14:04:00.000+02:00,anyVehicle,1080,79.74,1,"{'SWEREF99TM': 'POINT (652805.94 6565755.17)',...",4
4,174,2025-07-29T14:04:00.000+02:00,anyVehicle,720,83.82,1,"{'SWEREF99TM': 'POINT (656154.96 6566753.11)',...",4


In [9]:
df.SiteId.nunique()

3228

In [8]:
df.SiteId.value_counts()

SiteId
3200    7
3198    7
3197    7
3196    7
3199    7
       ..
387     1
1116    1
780     1
850     1
860     1
Name: count, Length: 3228, dtype: int64

In [15]:
df[df['SiteId'] == 3196]

Unnamed: 0,SiteId,MeasurementTime,VehicleType,VehicleFlowRate,AverageVehicleSpeed,CountyNo,Geometry,RegionId
3429,3196,2025-07-29T14:04:00.000+02:00,car,420,75.5,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3430,3196,2025-07-29T14:04:00.000+02:00,anyVehicle,540,75.47,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3436,3196,2025-07-29T14:02:00.000+02:00,trailer,120,69.4,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3437,3196,2025-07-29T14:04:00.000+02:00,bus,60,75.9,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3440,3196,2025-07-29T14:01:00.000+02:00,carWithTrailer,60,73.0,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3442,3196,2025-07-29T14:04:00.000+02:00,lorry,60,74.8,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5
3447,3196,2025-07-29T13:07:00.000+02:00,other,60,51.1,14,"{'SWEREF99TM': 'POINT (322746 6392295.06)', 'W...",5


In [20]:
df.CountyNo.value_counts()

CountyNo
1     2455
14     940
12     139
Name: count, dtype: int64

In [None]:
df.shape

In [None]:
# Extract WGS84 string directly from the dictionary
df['wgs84'] = df['Geometry'].apply(lambda x: x['WGS84'])

In [None]:
# Use regex to extract longitude and latitude from "POINT (lon lat)"
df[['longitude', 'latitude']] = df['wgs84'].str.extract(r'POINT \(([-\d.]+) ([-\d.]+)\)')

In [None]:
# Convert to float
df['longitude'] = df['longitude'].astype(float)
df['latitude'] = df['latitude'].astype(float)

In [None]:
df.head(5)  # Preview first five entries

In [None]:
df.MeasurementTime.nunique()

In [None]:
df.latitude.nunique()

In [None]:
df.MeasurementTime.value_counts()

In [None]:
df.longitude.value_counts()

In [None]:
print(df.head(2))  # Preview first two entries
