In [None]:

import requests
import pandas as pd
from datetime import datetime, timedelta
import json

 

API_KEY = "cc099cfca8464b55b73357eec92da761"

def run_query(query):
  try:
      response = requests.post(
          url="https://api.trafikinfo.trafikverket.se/v2/data.json",
          data=query.encode("utf-8"),
          headers={"Content-Type": "text/xml"},
      )

      # Check for HTTP errors
      response.raise_for_status()

      # Try parsing JSON
      try:
          data = response.json()
      except json.JSONDecodeError as e:
          print("❌ Failed to parse JSON response:")
          print(response.text)
          raise e

      # Check for API-level errors in the response
      if "RESPONSE" in data and "RESULT" in data["RESPONSE"]:
          result = data["RESPONSE"]["RESULT"][0]
          if "ERROR" in result:
              print("⚠️ API Error:")
              print(json.dumps(result["ERROR"], indent=2))
          else:
              print("✅ Success")
      else:
          print("⚠️ Unexpected response format:")
          print(json.dumps(data, indent=2))

  except requests.exceptions.RequestException as e:
      print("🚨 HTTP Request failed:")
      print(e)
  return response

def fetch_stations():
    query = f"""
    <REQUEST>
        <LOGIN authenticationkey='{API_KEY}' />
          <QUERY  objecttype="TrainStation" namespace="rail.infrastructure" schemaversion="1.5">
            <FILTER>
    
            </FILTER>
            <EXCLUDE>LocationInformationText</EXCLUDE>
            <EXCLUDE>ModifiedTime</EXCLUDE>
            <EXCLUDE>PlatformLine</EXCLUDE>
            
          </QUERY>
    </REQUEST>
    """

    print(query)
    response = run_query(query)


    data = response.json()


    data = response.json()["RESPONSE"]["RESULT"][0]["TrainStation"]
    df = pd.json_normalize(data)


    return df

def fetch_departures(last_AdvertisedTimeAtLocation=None):
    now = datetime.now()
    now_str = now.strftime("%Y-%m-%dT%H:%M:%S")
    one_hour_later = (now + timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%S")

    print (now_str)

    from_time = '2025-04-01T00:00:00'
                  
    to_time = '2025-05-01T00:00:00'

    query = f"""
    <REQUEST>
      <LOGIN authenticationkey='{API_KEY}' />
      <QUERY objecttype='TrainAnnouncement' schemaversion="1.9" limit='10000'>  
        <FILTER>
          <AND>
            <EQ name='Advertised' value='true'  />
            <GTE name ='AdvertisedTimeAtLocation' value = '+{last_AdvertisedTimeAtLocation}+' />

            
            
            
            <EQ name='ActivityType' value='Avgang' />
          </AND>
        </FILTER>
        <EXCLUDE>WebLink</EXCLUDE>
        <EXCLUDE>WebLinkName</EXCLUDE>
        <EXCLUDE>OperationalTransportIdentifiers</EXCLUDE>
        <EXCLUDE>OtherInformation</EXCLUDE>
        <EXCLUDE>ViaToLocation</EXCLUDE>
        <EXCLUDE>MobileWebLink</EXCLUDE>
        <EXCLUDE>TimeAtLocationWithSeconds</EXCLUDE>
        <EXCLUDE>TypeOfTraffic</EXCLUDE>
        <EXCLUDE>ProductInformation</EXCLUDE>

        


      </QUERY>
    </REQUEST>
    """

   
    response = run_query(query)


    data = response.json()


    data = response.json()["RESPONSE"]["RESULT"][0]["TrainAnnouncement"]
    df = pd.json_normalize(data)


    return df



In [10]:

df_raw = fetch_departures()


2025-05-27T09:24:08
⚠️ API Error:
{
  "SOURCE": "ResponseStreamer",
  "MESSAGE": "Maximum response size is reached. The result is not complete."
}


In [11]:
 

import pandas as pd

df = pd.DataFrame(df_raw)
print(df.head())


# Create data/raw folder if needed
Path("../data/raw").mkdir(parents=True, exist_ok=True)

 
# Convert time columns to datetime
df['AdvertisedTimeAtLocation'] = pd.to_datetime(df['AdvertisedTimeAtLocation'])
df['TimeAtLocation'] = pd.to_datetime(df['TimeAtLocation'])

# Create 'Hour' column (rounded to the nearest hour)
df['Hour'] = df['AdvertisedTimeAtLocation'].dt.floor('h')
df['Hour'] = df['Hour'].dt.hour

# Calculate 'DelayMinutes'
df['DelayMinutes'] = ((df['TimeAtLocation'] - df['AdvertisedTimeAtLocation']).dt.total_seconds() / 60).fillna(0).round().astype(int)




# Save CSV
df.to_csv("../data/raw/train_departures.csv", index=False)

                             ActivityId ActivityType  Advertised  \
0  1500adde-385d-31b6-08dd-903eab6b3e79       Avgang        True   
1  1500adde-385d-31b6-08dd-903ec887c620       Avgang        True   
2  1500adde-385d-31b6-08dd-903ec43fcb52       Avgang        True   
3  1500adde-385d-31b6-08dd-903e4bd58ac3       Avgang        True   
4  1500adde-385d-31b6-08dd-903eb9d748cf       Avgang        True   

        AdvertisedTimeAtLocation AdvertisedTrainIdent  Canceled  Deleted  \
0  2025-05-26T00:00:00.000+02:00                 2983     False    False   
1  2025-05-26T00:03:00.000+02:00                63591     False    False   
2  2025-05-26T00:03:00.000+02:00                52277     False    False   
3  2025-05-26T00:04:00.000+02:00                 1243      True    False   
4  2025-05-26T00:04:00.000+02:00                 3594     False    False   

                DepartureDateOTN  \
0  2025-05-26T00:00:00.000+02:00   
1  0001-01-01T00:00:00.000+01:00   
2  2025-05-26T00:00:00.000

In [12]:
 


df_raw = fetch_stations()

import pandas as pd

df = pd.DataFrame(df_raw)
print(df.head())


# Create data/raw folder if needed
Path("../data/raw").mkdir(parents=True, exist_ok=True)

# Save CSV
df.to_csv("../data/raw/stations.csv", index=False)


    <REQUEST>
        <LOGIN authenticationkey='cc099cfca8464b55b73357eec92da761' />
          <QUERY  objecttype="TrainStation" namespace="rail.infrastructure" schemaversion="1.5">
            <FILTER>
    
            </FILTER>
            <EXCLUDE>LocationInformationText</EXCLUDE>
            <EXCLUDE>ModifiedTime</EXCLUDE>
            <EXCLUDE>PlatformLine</EXCLUDE>
            
          </QUERY>
    </REQUEST>
    
✅ Success
   Advertised AdvertisedLocationName AdvertisedShortLocationName  \
0        True               Alingsås                    Alingsås   
1        True               Anneberg                    Anneberg   
2        True                  Aneby                       Aneby   
3        True                  Aspen                       Aspen   
4        True                 Arvika                      Arvika   

  PrimaryLocationCode CountryCode CountyNo  Deleted LocationSignature  \
0                 119          SE     [14]    False                 A   
1        