In [1]:
# ingestion/fetch_queue_events.py

import requests
import json
import os
from dotenv import load_dotenv
import pandas as pd

In [2]:
load_dotenv()
API_KEY = os.getenv("TRAFIKVERKET_API_KEY")

In [3]:
def fetch_queue_events():
    url = "https://api.trafikinfo.trafikverket.se/v2/data.json"
    
    xml_query = f"""
    <REQUEST>
      <LOGIN authenticationkey="{API_KEY}"/>
      <QUERY objecttype="TrafficFlow" namespace="Road.TrafficInfo" schemaversion="1.5">
        <INCLUDE>AverageVehicleSpeed</INCLUDE>
        <INCLUDE>VehicleFlowRate</INCLUDE>
        <INCLUDE>MeasurementTime</INCLUDE>
        <INCLUDE>Geometry</INCLUDE>
        <INCLUDE>VehicleType</INCLUDE>
        <INCLUDE>CountyNo</INCLUDE>
        <INCLUDE>RegionId</INCLUDE>
      </QUERY>
    </REQUEST>
    """

    response = requests.post(url, data=xml_query.encode('utf-8'), headers={"Content-Type": "text/xml"})
    
    if response.status_code == 200:
        data = response.json()
        return data['RESPONSE']['RESULT'][0]['TrafficFlow']
    else:
        print("Error:", response.status_code)
        return []

In [4]:
if __name__ == "__main__":
    data = fetch_queue_events()
    #print(data)  # Preview first two entries

In [5]:
df = pd.DataFrame(data)


In [6]:
df.head(5) # Preview first two entries

Unnamed: 0,MeasurementTime,VehicleType,VehicleFlowRate,AverageVehicleSpeed,CountyNo,Geometry,RegionId
0,2025-07-29T11:10:00.000+02:00,anyVehicle,720,72.25,1,"{'SWEREF99TM': 'POINT (677754.96 6578623.19)',...",4
1,2025-07-29T11:10:00.000+02:00,anyVehicle,900,64.25,1,"{'SWEREF99TM': 'POINT (674788.02 6583302.84)',...",4
2,2025-07-29T11:10:00.000+02:00,anyVehicle,1620,52.8,1,"{'SWEREF99TM': 'POINT (651018.96 6563771.14)',...",4
3,2025-07-28T06:37:00.000+02:00,anyVehicle,660,83.72,1,"{'SWEREF99TM': 'POINT (652805.94 6565755.17)',...",4
4,2025-07-29T11:10:00.000+02:00,anyVehicle,780,88.5,1,"{'SWEREF99TM': 'POINT (656154.96 6566753.11)',...",4


In [9]:
df.Geometry.value_counts()

Geometry
{'SWEREF99TM': 'POINT (382476.02 6173595.84)', 'WGS84': 'POINT (13.13026 55.693893)'}     14
{'SWEREF99TM': 'POINT (380424 6170153.19)', 'WGS84': 'POINT (13.099134 55.662476)'}       14
{'SWEREF99TM': 'POINT (380458.01 6167553.91)', 'WGS84': 'POINT (13.100805 55.63914)'}     14
{'SWEREF99TM': 'POINT (380394.99 6167516.14)', 'WGS84': 'POINT (13.099821 55.638786)'}    14
{'SWEREF99TM': 'POINT (382872 6173647.2)', 'WGS84': 'POINT (13.136535 55.69445)'}         14
                                                                                          ..
{'SWEREF99TM': 'POINT (653491.99 6566150.19)', 'WGS84': 'POINT (17.688229 59.20659)'}      1
{'SWEREF99TM': 'POINT (668797.04 6588574.13)', 'WGS84': 'POINT (17.973394 59.40188)'}      1
{'SWEREF99TM': 'POINT (663589 6571926.87)', 'WGS84': 'POINT (17.869167 59.254627)'}        1
{'SWEREF99TM': 'POINT (671458.99 6585555.17)', 'WGS84': 'POINT (18.017801 59.37373)'}      1
{'SWEREF99TM': 'POINT (682763.02 6579526.02)', 'WGS84': 'POIN

In [11]:
df.shape

(3533, 7)

In [12]:
# Extract WGS84 string directly from the dictionary
df['wgs84'] = df['Geometry'].apply(lambda x: x['WGS84'])

In [13]:
# Use regex to extract longitude and latitude from "POINT (lon lat)"
df[['longitude', 'latitude']] = df['wgs84'].str.extract(r'POINT \(([-\d.]+) ([-\d.]+)\)')

In [14]:
# Convert to float
df['longitude'] = df['longitude'].astype(float)
df['latitude'] = df['latitude'].astype(float)

In [15]:
df.head(5)  # Preview first five entries

Unnamed: 0,MeasurementTime,VehicleType,VehicleFlowRate,AverageVehicleSpeed,CountyNo,Geometry,RegionId,wgs84,longitude,latitude
0,2025-07-29T11:10:00.000+02:00,anyVehicle,720,72.25,1,"{'SWEREF99TM': 'POINT (677754.96 6578623.19)',...",4,POINT (18.122711 59.308964),18.122711,59.308964
1,2025-07-29T11:10:00.000+02:00,anyVehicle,900,64.25,1,"{'SWEREF99TM': 'POINT (674788.02 6583302.84)',...",4,POINT (18.074469 59.352165),18.074469,59.352165
2,2025-07-29T11:10:00.000+02:00,anyVehicle,1620,52.8,1,"{'SWEREF99TM': 'POINT (651018.96 6563771.14)',...",4,POINT (17.64332 59.186134),17.64332,59.186134
3,2025-07-28T06:37:00.000+02:00,anyVehicle,660,83.72,1,"{'SWEREF99TM': 'POINT (652805.94 6565755.17)',...",4,POINT (17.675951 59.203293),17.675951,59.203293
4,2025-07-29T11:10:00.000+02:00,anyVehicle,780,88.5,1,"{'SWEREF99TM': 'POINT (656154.96 6566753.11)',...",4,POINT (17.735239 59.211025),17.735239,59.211025


In [16]:
df.longitude.nunique()

3048

In [17]:
df.latitude.nunique()

2848

In [18]:
df.latitude.value_counts()

latitude
55.693893    14
55.694450    14
55.662476    14
57.564926    14
55.639140    14
             ..
59.366386     1
59.373688     1
59.221867     1
59.423447     1
59.301210     1
Name: count, Length: 2848, dtype: int64

In [19]:
df.longitude.value_counts()

longitude
13.099821    14
13.136535    14
13.099134    14
13.100805    14
13.130260    14
             ..
18.017801     1
18.014965     1
18.047836     1
17.925161     1
18.047848     1
Name: count, Length: 3048, dtype: int64

In [20]:
print(df.head(2))  # Preview first two entries


                 MeasurementTime VehicleType  VehicleFlowRate  \
0  2025-07-29T11:10:00.000+02:00  anyVehicle              720   
1  2025-07-29T11:10:00.000+02:00  anyVehicle              900   

   AverageVehicleSpeed  CountyNo  \
0                72.25         1   
1                64.25         1   

                                            Geometry  RegionId  \
0  {'SWEREF99TM': 'POINT (677754.96 6578623.19)',...         4   
1  {'SWEREF99TM': 'POINT (674788.02 6583302.84)',...         4   

                         wgs84  longitude   latitude  
0  POINT (18.122711 59.308964)  18.122711  59.308964  
1  POINT (18.074469 59.352165)  18.074469  59.352165  
