# Fetch Historical Data
Fetches historical data from the Trafiklab API Kollektivtrafikens Datalabb (KoDa).

## Imports

In [1]:
import os
from pathlib import Path
import sys

root_dir = Path().absolute()
# Strip subdirectories if the notebook started in any
if root_dir.parts[-1:] == ('pipeline',):
    root_dir = Path(*root_dir.parts[:-1])
if root_dir.parts[-1:] == ('src',):
    root_dir = Path(*root_dir.parts[:-1])
root_dir = str(root_dir) 

os.chdir(root_dir)
print(f"Root dir: {Path.cwd()}")

from datetime import date, timedelta
from src.data_utils.filter import *
from src.data_utils.ingest import *

Root dir: C:\Users\royli\Desktop\Courses\ID2223_Scalable_Machine_Learning_and_Deep_Learning\Project


## Fetch Data

### Decide How Many Days in the Past to Fetch

In [2]:
number_of_days = 7
yesterday = date.today() - timedelta(days=1)

dates = [yesterday - timedelta(days=i) for i in range(number_of_days)]
dates = [d.strftime("%Y-%m-%d") for d in dates]

print(dates)

['2025-12-23', '2025-12-22', '2025-12-21', '2025-12-20', '2025-12-19', '2025-12-18', '2025-12-17']


### Fetch Static Data from Trafiklab's KoDa API

In [7]:
for d in dates:
    date_dir = Path(f"data/static/{d}")
    if date_dir.exists():
        print(f"{d} exists, skipping")
        continue
        
    max_retries = 10
    for attempt in range(max_retries):
        try:
            zip_file = fetch_static(d, "data")
            zip_dir = extract_zip(zip_file)
            txt_to_csv(zip_dir)
            break
        except Exception as e:
            print(f"Attempt {attempt} failed: {e}")
            if attempt == max_retries:
                print("Reached max retries, skipping date")
                
            sleep_time = 5
            print(f"Retrying in {sleep_time}s...")
            time.sleep(sleep_time)

print("Finishe

2025-12-23 exists, skipping
2025-12-22 exists, skipping
2025-12-21 exists, skipping
Saved GTFS static file to data\2025-12-20.zip.
Successfully extracted data\2025-12-20.zip to data\static\2025-12-20
Removed data\2025-12-20.zip
Saved GTFS static file to data\2025-12-19.zip.
Successfully extracted data\2025-12-19.zip to data\static\2025-12-19
Removed data\2025-12-19.zip
Saved GTFS static file to data\2025-12-18.zip.
Successfully extracted data\2025-12-18.zip to data\static\2025-12-18
Removed data\2025-12-18.zip
Saved GTFS static file to data\2025-12-17.zip.
Successfully extracted data\2025-12-17.zip to data\static\2025-12-17
Removed data\2025-12-17.zip
