# Edellisen viikon junatiedot

In [1]:
import requests
from requests.exceptions import HTTPError
import pandas as pd
import datetime as dt

today = dt.date.today()
first_day_of_current_week = today - dt.timedelta(days=today.weekday())
first_day_of_previous_week = first_day_of_current_week - dt.timedelta(weeks=1)
last_day_of_previous_week = first_day_of_current_week - dt.timedelta(days=1)


base_api_url = 'https://rata.digitraffic.fi/api/v1/trains/'
json_list = []

try:
    for date in pd.date_range(first_day_of_previous_week, last_day_of_previous_week):
        response = requests.get(base_api_url + date.strftime('%Y-%m-%d'))
        response.raise_for_status()
        trains_json = response.json()
        if isinstance(trains_json, list):
            json_list.extend(trains_json)
        else:
            json_list.append(trains_json)
    print(f'Data noudettu onnistuneesti {first_day_of_previous_week.strftime('%d.%m.%Y')} - {last_day_of_previous_week.strftime('%d.%m.%Y')} väliseltä ajalta.')
except HTTPError as http_err:
    print(f'Datan noutamisessa tapahtui virhe: {http_err}')
except Exception as err:
    print(f'Tapahtui odottamaton virhe: {err}')

Data noudettu onnistuneesti 19.01.2026 - 25.01.2026 väliseltä ajalta.


In [None]:
json_list

In [3]:
trains = pd.json_normalize(json_list,
                           record_path=['timeTableRows'],
                           meta=[
                              'trainNumber', 
                              'departureDate', 
                              'operatorUICCode',
                              'operatorShortCode',
                              'trainType',
                              'trainCategory',
                              'commuterLineID'
                           ]
)

trains = trains.drop(columns = ['stationUICCode', 'countryCode', 'stopSector', 'commercialTrack', 'trainReady.source', 'trainReady.accepted', 'trainReady.timestamp',
                                'trainStopping', 'commercialStop', 'estimateSource', 'liveEstimateTime', 'unknownTrack', 'unknownDelay', 'causes'])

trains.head()

Unnamed: 0,stationShortCode,type,cancelled,scheduledTime,actualTime,differenceInMinutes,trainNumber,departureDate,operatorUICCode,operatorShortCode,trainType,trainCategory,commuterLineID
0,HKI,DEPARTURE,False,2026-01-19T04:54:00.000Z,2026-01-19T04:54:31.000Z,1.0,1,2026-01-19,10,vr,IC,Long-distance,
1,PSL,ARRIVAL,False,2026-01-19T04:59:00.000Z,2026-01-19T04:59:11.000Z,0.0,1,2026-01-19,10,vr,IC,Long-distance,
2,PSL,DEPARTURE,False,2026-01-19T05:00:00.000Z,2026-01-19T05:00:46.000Z,1.0,1,2026-01-19,10,vr,IC,Long-distance,
3,LOP,ARRIVAL,False,2026-01-19T05:00:53.000Z,,1.0,1,2026-01-19,10,vr,IC,Long-distance,
4,LOP,DEPARTURE,False,2026-01-19T05:00:53.000Z,,1.0,1,2026-01-19,10,vr,IC,Long-distance,


In [4]:
trains.tail()

Unnamed: 0,stationShortCode,type,cancelled,scheduledTime,actualTime,differenceInMinutes,trainNumber,departureDate,operatorUICCode,operatorShortCode,trainType,trainCategory,commuterLineID
498173,LOP,ARRIVAL,False,2026-01-25T16:42:34.000Z,,4.0,89700,2026-01-25,10,vr,HL,Commuter,R
498174,LOP,DEPARTURE,False,2026-01-25T16:42:34.000Z,,4.0,89700,2026-01-25,10,vr,HL,Commuter,R
498175,PSL,ARRIVAL,False,2026-01-25T16:43:30.000Z,2026-01-25T16:47:47.000Z,4.0,89700,2026-01-25,10,vr,HL,Commuter,R
498176,PSL,DEPARTURE,False,2026-01-25T16:44:00.000Z,2026-01-25T16:48:13.000Z,4.0,89700,2026-01-25,10,vr,HL,Commuter,R
498177,HKI,ARRIVAL,False,2026-01-25T16:49:00.000Z,2026-01-25T16:52:31.000Z,4.0,89700,2026-01-25,10,vr,HL,Commuter,R


In [5]:
columns = ['departureDate', 'trainNumber', 'trainCategory', 'trainType', 'commuterLineID', 'stationShortCode', 'type', 
           'scheduledTime', 'actualTime', 'differenceInMinutes', 'operatorUICCode', 'operatorShortCode', 'cancelled']

trains = trains[columns]
trains.head()

Unnamed: 0,departureDate,trainNumber,trainCategory,trainType,commuterLineID,stationShortCode,type,scheduledTime,actualTime,differenceInMinutes,operatorUICCode,operatorShortCode,cancelled
0,2026-01-19,1,Long-distance,IC,,HKI,DEPARTURE,2026-01-19T04:54:00.000Z,2026-01-19T04:54:31.000Z,1.0,10,vr,False
1,2026-01-19,1,Long-distance,IC,,PSL,ARRIVAL,2026-01-19T04:59:00.000Z,2026-01-19T04:59:11.000Z,0.0,10,vr,False
2,2026-01-19,1,Long-distance,IC,,PSL,DEPARTURE,2026-01-19T05:00:00.000Z,2026-01-19T05:00:46.000Z,1.0,10,vr,False
3,2026-01-19,1,Long-distance,IC,,LOP,ARRIVAL,2026-01-19T05:00:53.000Z,,1.0,10,vr,False
4,2026-01-19,1,Long-distance,IC,,LOP,DEPARTURE,2026-01-19T05:00:53.000Z,,1.0,10,vr,False


In [6]:
print(f'Dataframessa on {trains.shape[0]} riviä ja {trains.shape[1]} saraketta.')

Dataframessa on 498178 riviä ja 13 saraketta.


## Junatiedot .parquet-muotoon

In [7]:
week_number = first_day_of_previous_week.isocalendar().week
year = first_day_of_previous_week.year
trains.to_parquet(rf'Data\vr_trains_{year}_week_{week_number}.parquet', index=False)