# Functions

In [1]:
class ElectricityHourPrice:
    def __init__(self, 
                 date, 
                 price_zone_code, 
                 consumer_volume, 
                 consumer_price, 
                 consumer_rd_volume,
                 consumer_spot_volume,
                 consumer_provide_rd,
                 consumer_max_price,
                 consumer_min_price,
                 supplier_volume,
                 supplier_price,
                 supplier_rd_volume,
                 supplier_spot_volume,
                 supplier_provide_rd,
                 supplier_max_price,
                 supplier_min_price,
                 hour):
        self.date = date
        self.price_zone_code = price_zone_code
        self.consumer_volume = consumer_volume
        self.consumer_price = consumer_price
        self.consumer_rd_volume = consumer_rd_volume
        self.consumer_spot_volume = consumer_spot_volume
        self.consumer_provide_rd = consumer_provide_rd
        self.consumer_max_price = consumer_max_price
        self.consumer_min_price = consumer_min_price
        self.supplier_volume = supplier_volume
        self.supplier_price = supplier_price
        self.supplier_rd_volume = supplier_rd_volume
        self.supplier_spot_volume = supplier_spot_volume
        self.supplier_provide_rd = supplier_provide_rd
        self.supplier_max_price = supplier_max_price
        self.supplier_min_price = supplier_min_price
        self.hour = hour

In [2]:
import requests
import xmltodict
from operator import itemgetter

def get_prices_by_date(date, price_zone = '2'):
    f_date = date.strftime('%Y%m%d')
    url = f'https://www.atsenergo.ru/market/stats.xml?period=0&date1={f_date}&date2={f_date}&zone={price_zone}&type=graph'
    response = requests.get(url, verify = False) #verify = False --> self-signed cert
    content = response.content
    result = xmltodict.parse(content)
    rows = list(map(itemgetter('col'), result['view']['row']))
    return rows

In [3]:
import pandas as pd

def verify_index(df):
    try:
        return (df.index == pd.date_range(start = df.index.min(), end = df.index.max(), freq = df.index.freq)).all()
    except:
        print("Error: can't verify temporary index")
        return False

In [4]:
import warnings
warnings.filterwarnings("ignore")
import datetime

def get_prices(start_date, end_date, price_zone):
    electricity_hour_prices = []
    cur_date = start_date
    print(f"Starting getting prices for {start_date.strftime('%Y-%m-%d')}")
    while cur_date <= end_date:
        if (cur_date.day % 7 == 0):
            print(f"Getting prices for {cur_date.strftime('%Y-%m-%d')}")
        try:
            price_rows = get_prices_by_date(cur_date, price_zone = price_zone)
            for price_row in price_rows:
                electricity_hour_prices.append(ElectricityHourPrice(*price_row))
        except Exception as e:
            print(f"Can't get the price data for {cur_date.strftime('%Y-%m-%d')}")
            print(f"Reason: {e}")
        finally:
            cur_date += datetime.timedelta(days = 1)
    print(f"Finished getting prices for {end_date.strftime('%Y-%m-%d')}")
    print('Formatting the dataset...')
    df = pd.DataFrame([vars(price_row) for price_row in electricity_hour_prices])
    # Make a copy of the dataset to avoid getting a SettingWithCopyWarning error
    df_form = df.copy()
    df_form[df.columns[1:-1]] = df_form[df.columns[1:-1]].astype(float)
    df_form['hour'] = df_form['hour'].astype(int)
    df_form['price_zone_code'] = df_form['price_zone_code'].astype(int)
    dates_from_table = df_form['date']
    hours_from_table = df_form['hour']
    dates = [datetime.datetime.strptime(i, '%d.%m.%Y') for i in dates_from_table]
    dates = [item[0].replace(hour = item[1]) for item in zip(dates, hours_from_table)]
    df_form.drop(columns = ['date', 'hour'], inplace = True)
    df_form['date'] = dates
    df_form['date'] = pd.to_datetime(df_form['date'], utc = True)
    df_form = df_form.set_index('date')
    df_form = df_form.asfreq('1H')
    df_form.sort_index(inplace = True)
    verification_result = verify_index(df_form)
    if (verification_result):
        print('Index is verified.')
    else:
        print('(Error) Problem with index occured!')
    print('Dataset is formatted.')
    return df_form

In [5]:
def add_time_features(df_orig):
    df = df_orig.copy()
    df['year'] = df.index.year
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['week_of_year'] = pd.Index(df.index.isocalendar().week)
    df['week_of_year'] = df['week_of_year'].astype(int)
    df['day_of_year'] = df.index.dayofyear
    df['day_of_week'] = df.index.dayofweek
    df['day'] = df.index.day
    df['hour'] = df.index.hour
    return df

# Actual code

In [8]:
import datetime

# Year-Month-Day
start_date = datetime.datetime(2021, 6, 24)
end_date = datetime.datetime(2024, 6, 24)
price_zone = '2'

# List of relevant columns is shown in the ElectricityHourPrice class
target_column = 'supplier_price'

if (end_date < start_date):
    t = start_date
    start_date = end_date
    end_date = t

print(f'Start date: {start_date.strftime("%Y-%m-%d")}')
print(f'End date: {end_date.strftime("%Y-%m-%d")}')
print(f'Time period: {str(end_date - start_date)}')
print()

df = get_prices(start_date, end_date, price_zone)
df = add_time_features(df)
df = df[[
    'year',
    'quarter',
    'month',
    'week_of_year',
    'day_of_year',
    'day_of_week',
    'day',
    'hour',
    target_column]]

df

Start date: 2021-06-24
End date: 2024-06-24
Time period: 1096 days, 0:00:00

Starting getting prices for 2021-06-24
Getting prices for 2021-06-28
Getting prices for 2021-07-07
Getting prices for 2021-07-14
Getting prices for 2021-07-21
Getting prices for 2021-07-28
Getting prices for 2021-08-07
Getting prices for 2021-08-14
Getting prices for 2021-08-21
Getting prices for 2021-08-28
Getting prices for 2021-09-07
Getting prices for 2021-09-14
Getting prices for 2021-09-21
Getting prices for 2021-09-28
Getting prices for 2021-10-07
Getting prices for 2021-10-14
Getting prices for 2021-10-21
Getting prices for 2021-10-28
Getting prices for 2021-11-07
Getting prices for 2021-11-14
Getting prices for 2021-11-21
Getting prices for 2021-11-28
Getting prices for 2021-12-07
Getting prices for 2021-12-14
Getting prices for 2021-12-21
Getting prices for 2021-12-28
Getting prices for 2022-01-07
Getting prices for 2022-01-14
Getting prices for 2022-01-21
Getting prices for 2022-01-28
Getting prices

Unnamed: 0_level_0,year,quarter,month,week_of_year,day_of_year,day_of_week,day,hour,supplier_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-06-24 00:00:00+00:00,2021,2,6,25,175,3,24,0,669.88
2021-06-24 01:00:00+00:00,2021,2,6,25,175,3,24,1,671.66
2021-06-24 02:00:00+00:00,2021,2,6,25,175,3,24,2,674.60
2021-06-24 03:00:00+00:00,2021,2,6,25,175,3,24,3,661.88
2021-06-24 04:00:00+00:00,2021,2,6,25,175,3,24,4,708.72
...,...,...,...,...,...,...,...,...,...
2024-06-24 19:00:00+00:00,2024,2,6,26,176,0,24,19,1193.24
2024-06-24 20:00:00+00:00,2024,2,6,26,176,0,24,20,1062.33
2024-06-24 21:00:00+00:00,2024,2,6,26,176,0,24,21,950.58
2024-06-24 22:00:00+00:00,2024,2,6,26,176,0,24,22,917.14


In [9]:
start_form = start_date.strftime("%Y-%m-%d")
end_form = end_date.strftime("%Y-%m-%d")
df.to_csv(
    f'electricity_price_hourly_{price_zone}_zone_{target_column}_{start_form}__{end_form}.csv',
    index = True
)