# Edellisen viikon junatiedot

In [None]:
import requests
from requests.exceptions import HTTPError
import pandas as pd
import datetime as dt

today = dt.date.today()
first_day_of_current_week = today - dt.timedelta(days=today.weekday())
first_day_of_previous_week = first_day_of_current_week - dt.timedelta(weeks=1)
last_day_of_previous_week = first_day_of_current_week - dt.timedelta(days=1)


base_api_url = 'https://rata.digitraffic.fi/api/v1/trains/'
json_list = []

try:
    for date in pd.date_range(first_day_of_previous_week, last_day_of_previous_week):
        response = requests.get(base_api_url + date.strftime('%Y-%m-%d'))
        response.raise_for_status()
        trains_json = response.json()
        if isinstance(trains_json, list):
            json_list.extend(trains_json)
        else:
            json_list.append(trains_json)
    print(f'Data noudettu onnistuneesti {first_day_of_previous_week.strftime('%d.%m.%Y')} - {last_day_of_previous_week.strftime('%d.%m.%Y')} väliseltä ajalta.')
except HTTPError as http_err:
    print(f'Datan noutamisessa tapahtui virhe: {http_err}')
except Exception as err:
    print(f'Tapahtui odottamaton virhe: {err}')

Data noudettu onnistuneesti 01.02.2026 - 22.02.2026 väliseltä ajalta.


In [3]:
json_list[0]

{'trainNumber': 1,
 'departureDate': '2026-02-01',
 'operatorUICCode': 10,
 'operatorShortCode': 'vr',
 'trainType': 'S',
 'trainCategory': 'Long-distance',
 'commuterLineID': '',
 'runningCurrently': False,
 'cancelled': False,
 'version': 293249395548,
 'timetableType': 'REGULAR',
 'timetableAcceptanceDate': '2025-10-30T06:30:01.000Z',
 'timeTableRows': [{'stationShortCode': 'HKI',
   'stationUICCode': 1,
   'countryCode': 'FI',
   'type': 'DEPARTURE',
   'trainStopping': True,
   'commercialStop': True,
   'cancelled': False,
   'scheduledTime': '2026-02-01T04:54:00.000Z',
   'actualTime': '2026-02-01T04:56:35.000Z',
   'differenceInMinutes': 3,
   'causes': [],
   'commercialTrack': '10',
   'trainReady': {'source': 'KUPLA',
    'accepted': True,
    'timestamp': '2026-02-01T04:52:43.000Z'}},
  {'stationShortCode': 'PSL',
   'stationUICCode': 10,
   'countryCode': 'FI',
   'type': 'ARRIVAL',
   'trainStopping': True,
   'commercialStop': True,
   'cancelled': False,
   'scheduledTi

In [4]:
trains = pd.json_normalize(json_list,
                           record_path=['timeTableRows'],
                           meta=[
                              'trainNumber', 
                              'departureDate', 
                              'operatorUICCode',
                              'operatorShortCode',
                              'trainType',
                              'trainCategory',
                              'commuterLineID'
                           ]
)

trains = trains.drop(columns = ['stationUICCode', 'countryCode', 'stopSector', 'commercialTrack', 'trainReady.source', 'trainReady.accepted', 'trainReady.timestamp',
                                'estimateSource', 'liveEstimateTime', 'unknownTrack', 'unknownDelay', 'causes'])

trains.head()

Unnamed: 0,stationShortCode,type,trainStopping,commercialStop,cancelled,scheduledTime,actualTime,differenceInMinutes,trainNumber,departureDate,operatorUICCode,operatorShortCode,trainType,trainCategory,commuterLineID
0,HKI,DEPARTURE,True,True,False,2026-02-01T04:54:00.000Z,2026-02-01T04:56:35.000Z,3.0,1,2026-02-01,10,vr,S,Long-distance,
1,PSL,ARRIVAL,True,True,False,2026-02-01T04:59:00.000Z,2026-02-01T05:01:20.000Z,2.0,1,2026-02-01,10,vr,S,Long-distance,
2,PSL,DEPARTURE,True,True,False,2026-02-01T05:00:00.000Z,2026-02-01T05:02:53.000Z,3.0,1,2026-02-01,10,vr,S,Long-distance,
3,LOP,ARRIVAL,False,,False,2026-02-01T05:00:53.000Z,,3.0,1,2026-02-01,10,vr,S,Long-distance,
4,LOP,DEPARTURE,False,,False,2026-02-01T05:00:53.000Z,,3.0,1,2026-02-01,10,vr,S,Long-distance,


In [5]:
trains.tail()

Unnamed: 0,stationShortCode,type,trainStopping,commercialStop,cancelled,scheduledTime,actualTime,differenceInMinutes,trainNumber,departureDate,operatorUICCode,operatorShortCode,trainType,trainCategory,commuterLineID
1564783,VRJ,ARRIVAL,False,,False,2026-02-22T01:11:27.000Z,,2.0,76042,2026-02-22,9999,winco,TYO,On-track machines,
1564784,VRJ,DEPARTURE,False,,False,2026-02-22T01:11:27.000Z,,2.0,76042,2026-02-22,9999,winco,TYO,On-track machines,
1564785,OLK,ARRIVAL,False,,False,2026-02-22T01:11:54.000Z,2026-02-22T01:14:21.000Z,2.0,76042,2026-02-22,9999,winco,TYO,On-track machines,
1564786,OLK,DEPARTURE,False,,False,2026-02-22T01:11:54.000Z,2026-02-22T01:14:56.000Z,3.0,76042,2026-02-22,9999,winco,TYO,On-track machines,
1564787,KÄP,ARRIVAL,True,True,False,2026-02-22T01:13:21.000Z,2026-02-22T01:16:49.000Z,3.0,76042,2026-02-22,9999,winco,TYO,On-track machines,


In [6]:
columns = ['departureDate', 'trainNumber', 'trainCategory', 'trainType', 'commuterLineID', 'stationShortCode', 'type', 
           'trainStopping', 'commercialStop', 'scheduledTime', 'actualTime', 'differenceInMinutes', 'operatorUICCode', 'operatorShortCode', 'cancelled']

trains = trains[columns]
trains.head()

Unnamed: 0,departureDate,trainNumber,trainCategory,trainType,commuterLineID,stationShortCode,type,trainStopping,commercialStop,scheduledTime,actualTime,differenceInMinutes,operatorUICCode,operatorShortCode,cancelled
0,2026-02-01,1,Long-distance,S,,HKI,DEPARTURE,True,True,2026-02-01T04:54:00.000Z,2026-02-01T04:56:35.000Z,3.0,10,vr,False
1,2026-02-01,1,Long-distance,S,,PSL,ARRIVAL,True,True,2026-02-01T04:59:00.000Z,2026-02-01T05:01:20.000Z,2.0,10,vr,False
2,2026-02-01,1,Long-distance,S,,PSL,DEPARTURE,True,True,2026-02-01T05:00:00.000Z,2026-02-01T05:02:53.000Z,3.0,10,vr,False
3,2026-02-01,1,Long-distance,S,,LOP,ARRIVAL,False,,2026-02-01T05:00:53.000Z,,3.0,10,vr,False
4,2026-02-01,1,Long-distance,S,,LOP,DEPARTURE,False,,2026-02-01T05:00:53.000Z,,3.0,10,vr,False


In [7]:
print(f'Dataframessa on {trains.shape[0]} riviä ja {trains.shape[1]} saraketta.')

Dataframessa on 1564788 riviä ja 15 saraketta.


## Junatiedot .parquet-muotoon

In [None]:
week_number = first_day_of_previous_week.isocalendar().week
year = first_day_of_previous_week.year
trains.to_parquet(rf'Data\vr_trains_{year}_week_{week_number}.parquet', index=False)