In [4]:
%matplotlib inline

import os
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

DATA_RAW = Path(os.getcwd()) / os.pardir / "data" / "raw" / "optimization"
DATA_FINAL = Path(os.getcwd()) / os.pardir / "data" / "final" / "optimization"

# Process data

In [5]:
old_steps = list(range(15, 1455, 15))
new_steps = range(96)

old_columns = [f"PV_Forecast_{s}" for s in old_steps] + [f"Load_Forecast_{s}" for s in old_steps]
new_columns = [f"pv_{s:02d}" for s in new_steps] + [f"load_{s:02d}" for s in new_steps]

rename_map = dict(zip(old_columns, new_columns))

rename_map.update({
    'Timestamp': 'timestamp',
    'SiteId': 'site_id',
    'Load_Values': 'actual_consumption',
    'PV_Values': 'actual_pv'
})

{'Load_Forecast_1005': 'load_66',
 'Load_Forecast_1020': 'load_67',
 'Load_Forecast_1035': 'load_68',
 'Load_Forecast_105': 'load_06',
 'Load_Forecast_1050': 'load_69',
 'Load_Forecast_1065': 'load_70',
 'Load_Forecast_1080': 'load_71',
 'Load_Forecast_1095': 'load_72',
 'Load_Forecast_1110': 'load_73',
 'Load_Forecast_1125': 'load_74',
 'Load_Forecast_1140': 'load_75',
 'Load_Forecast_1155': 'load_76',
 'Load_Forecast_1170': 'load_77',
 'Load_Forecast_1185': 'load_78',
 'Load_Forecast_120': 'load_07',
 'Load_Forecast_1200': 'load_79',
 'Load_Forecast_1215': 'load_80',
 'Load_Forecast_1230': 'load_81',
 'Load_Forecast_1245': 'load_82',
 'Load_Forecast_1260': 'load_83',
 'Load_Forecast_1275': 'load_84',
 'Load_Forecast_1290': 'load_85',
 'Load_Forecast_1305': 'load_86',
 'Load_Forecast_1320': 'load_87',
 'Load_Forecast_1335': 'load_88',
 'Load_Forecast_135': 'load_08',
 'Load_Forecast_1350': 'load_89',
 'Load_Forecast_1365': 'load_90',
 'Load_Forecast_1380': 'load_91',
 'Load_Forecast_1

In [27]:
def get_price_data(df):
    """ Reshape df up to row n-96 to have 
        to 96 col arrays with buy and sell prices.
    """
    arrays = []
    for i in range(df.shape[0] - 96):
        next_96_buy = df.Purchase_Price[i:i+96].copy()
        next_96_sell = df.Sale_Price[i:i+96].copy()
        
        next_buy_sell = np.hstack((next_96_buy, next_96_sell))
        
        arrays.append(next_buy_sell)
        
    cols = [f"price_buy_{s:02d}" for s in range(96)] + \
           [f"price_sell_{s:02d}" for s in range(96)]
    
    return pd.DataFrame(data=arrays,
                        columns=cols,
                        index=df.index.values[:-96])


def process_raw(f, out_path=None):
    # load data
    df = pd.read_csv(f, parse_dates=['Timestamp'], sep=';')
    raw = df.copy()
    
    
    train = df[df.given == 'given1'].copy()
    test = df[df.given == 'given2'].copy()
    private = df[df.given == 'secret'].copy()
    
    for folder, sub_df in [('train', train), ('submit', test), ('eval', private)]:
    
        # drop "given"
        sub_df.drop('given', axis=1, inplace=True)

        # rename columns
        sub_df.rename(columns=rename_map, inplace=True)

        # rotate prices
        price_data = get_price_data(sub_df)

        final = sub_df.iloc[:-96, :].join(price_data)

        prefixes = ['load', 'pv', 'price_buy', 'price_sell']
        ordered_columns = [f"{pre}_{d:02d}" for pre in prefixes for d in range(96) ]

        # order columns sensibly
        final = final[['timestamp', 'site_id', 'actual_consumption', 'actual_pv'] + ordered_columns]
        final.set_index('timestamp', inplace=True)
        
        final.to_csv(out_path/folder/f.name)
    
    
for f in (DATA_RAW/'Secret').glob('[0-9]*.csv'):
    process_raw(f, DATA_FINAL)

# Metadata

In [40]:
metadata = pd.read_csv(DATA_RAW/'Metadata'/'site-meta.csv', sep=";", decimal=',', index_col=0)

metadata.to_csv(DATA_FINAL/'public'/'metadata.csv')