In [None]:
# Reloading modules automatically when they change
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import plotly.graph_objects as go

from datetime import datetime
from zoneinfo import ZoneInfo

from forecaster.data import fingrid

# Define the dataset IDs for the datasets we want to fetch
dataset_ids = ['246', '247', '165', '242']

In [None]:
# Get short descriptions of all datasets
dataset_shorts = fingrid.fetch_dataset_shorts()
fingrid.print_dataset_shorts(dataset_shorts)

## Define the time interval to fetch data within

In [None]:
# Define the Helsinki timezone
timezone = ZoneInfo('Europe/Helsinki')

# Define the start and end times
dt_start = datetime(2016, 1, 1, 00, 00, tzinfo=timezone)
dt_end = datetime(2024, 1, 1, 00, 00, tzinfo=timezone)

# Convert the start and end times to UTC and format them as RFC 3339 strings
start_time_utc_3339 = dt_start.astimezone(ZoneInfo('UTC')).isoformat().replace("+00:00", "Z")
end_time_utc_3339 = dt_end.astimezone(ZoneInfo('UTC')).isoformat().replace("+00:00", "Z")
print(start_time_utc_3339)
print(end_time_utc_3339)

## Download data for selected datasets

In [None]:
# Fetch the data for each dataset and save it to a CSV file
for dataset_id in dataset_ids:  

    print(f"Fetching data for dataset {dataset_id}")
    data = fingrid.fetch_dataset_data(dataset_id, start_time_utc_3339, end_time_utc_3339)

    # Convert the data to a DataFrame
    data = pd.DataFrame(data)
    data_copy = data.copy()

    # Drop the endTime column and set the startTime column as the index
    data.drop(columns='endTime', inplace=True)
    data['startTime'] = pd.to_datetime(data['startTime'])
    data['startTime'] = data['startTime'].apply(lambda x: x.astimezone(timezone))
    data.set_index('startTime', inplace=True)
    data.sort_index(inplace=True)

    # Resample the data to hourly frequency
    data = data.resample('h').nearest()

    # Fill potentially missing values using linear interpolation
    # col_name = data.columns[0]
    # full_index = pd.date_range(start=data.index.min(), end=data.index.max(), freq='h')
    # data = data.reindex(full_index)
    # data[col_name] = data[col_name].interpolate(method='linear')

    data.to_csv(f'../data/{dataset_id}.csv')

## Visualize the external variables

In [None]:
from forecaster.plotting.interactive import plot_external_data
plot_external_data(dataset_ids)