## Build and Test API with dummy model

In [None]:
from electricity_price_predictor.data import get_all, get_weather
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import joblib

Load data

In [None]:
df = get_all()

### Features and target defined

In [None]:
train = df.iloc[:-1]
test = df.iloc[-1:]

In [None]:
X_train = train.drop('price', axis=1)
y_train = train.price.values

X_test = test.drop('price', axis=1)
y_test = test.price.values

### Model

In [None]:
pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('estimator', LinearRegression())
])

In [None]:
pipe.fit(X_train, y_train)

In [None]:
pipe.predict(X_test)

#### Save model

Model saved

In [None]:
#model_name = '../test_model.joblib'
#joblib.dump(pipe, model_name)

Test set saved

In [None]:
#dummy_test = '../test_testset.joblib'
#joblib.dump(X_test, dummy_test)

## App api

In [None]:
from flask import Flask, escape, request
app = Flask(__name__)

In [None]:
@app.route('/')
def hello():
    # get param from http://127.0.0.1:5000/?name=value
    name = request.args.get("name", "World")
    return f'Hello, {escape(name)}!'

In [None]:
@app.route('/predict_day_ahead')
def day_head():
    test = joblib.load('../test_testset.joblib')

    model = joblib.load('../test_model.joblib')
    pred = model.predict(test)[0]
    
    return {'test_values': test, 'day-ahead prediction': pred}

In [None]:
X_test

In [None]:
import streamlit as st

In [None]:
import numpy as np
import pandas as pd

st.markdown("""# This is a header
## This is a sub header
This is text""")

df = pd.DataFrame({
          'first column': list(range(1, 11)),
          'second column': np.arange(10, 101, 10)
        })

# this slider allows the user to select a number of lines
# to display in the dataframe
# the selected value is returned by st.slider
line_count = st.slider('Select a line count', 1, 10, 3)

# and used in order to select the displayed lines
head_df = df.head(line_count)

head_df

In [None]:
from electricity_price_predictor.data import get_weather

In [None]:
def get_weather(path='../raw_data/weather_2015_2020.csv'):
    df = pd.read_csv(path)

    df['dt'] = pd.to_datetime(df.dt)

    # drop unnecessary columns
    to_drop = ['dt_iso','timezone','lat', 'lon','sea_level','grnd_level',
               'rain_1h','rain_3h', 'pressure', 'snow_1h', 'snow_3h',
               'temp_min','temp_max','weather_id', 'weather_description',
               'weather_icon', 'wind_deg']
    df = df.drop(to_drop, axis=1)

    # population of each city in the df
    pop = {'Aarhus': 349_983,
        'Odense': 204_895,
        'Aalborg': 217_075,
        'Esbjerg': 115_748,
        'Vejle': 111_743,
        'Randers': 96_559,
        'Viborg': 93_819,
        'Kolding': 89_412,
        'Silkeborg': 89_328,
        'Herning': 86_348,
        'Horsens': 83_598}

    df['population'] = [pop[city] for city in df.city_name]

    # numeric weather values as affects demand or supply
    numeric_cols = ['temp', 'feels_like', 'humidity',  'clouds_all','wind_speed']

    weather_df = pd.DataFrame()

    #for the numeric columns, group by datetime and average according to their population weight
    for col in numeric_cols:
    #group by the datecolumn for each element in the column average it by it's weight
        weather_df[col] = df.groupby(df.dt).apply(lambda x : np.average(x[col], weights=x.population))

    # check for missing indices
    missing_idx = pd.date_range(start = '2015-01-01', end = '2020-11-24', freq='H' ).difference(weather_df.index)

    # impute missing indices with average of bounding rows
    for idx in missing_idx:
        weather_df.loc[idx] = weather_df.loc[pd.to_datetime(idx) - timedelta(hours= 1)] + \
                      weather_df.loc[pd.to_datetime(idx) + timedelta(hours= 1)] / 2

    weather_df = weather_df.sort_index()

    return weather_df

In [None]:
weather = get_weather()

In [None]:
weather

In [None]:
!pwd

In [None]:
from electricity_price_predictor.data import get_all

In [None]:
df = get_all()

In [None]:
df.head()

In [None]:
X = df.price.values

In [None]:
y = df.wind_prod.values

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.scatter(y, X)

In [None]:
df = get_weather()

In [None]:
df

In [1]:



df.to_csv('../raw_data/updated_data.csv')
    return df

## Testing API for new prices generation

In [2]:
#import requests
# import json
# import xmltodict
# import pandas as pd
import time
from datetime import date, timezone, datetime, timedelta
from electricity_price_predictor.data import *
import pandas as pd

In [3]:
get_shifted_price()

ValueError: Length mismatch: Expected axis has 4 elements, new values have 2 elements

In [57]:
df= get_updated_price()

In [60]:
df[df.index.duplicated()]

Unnamed: 0_level_0,price
time,Unnamed: 1_level_1
2015-10-25 02:00:00,20.77
2016-10-30 02:00:00,32.0
2017-10-29 02:00:00,-21.06
2018-10-28 02:00:00,41.59
2019-10-27 02:00:00,0.05
2020-10-25 02:00:00,0.09


In [37]:
old = old.sort_index()
new = new.sort_index()
if old.index[-1] < new.index[0]:
    df = pd.concat([old, new])
else:
    df = old

In [42]:
get_price()

Unnamed: 0_level_0,price
time,Unnamed: 1_level_1
2015-01-01 00:00:00,25.02
2015-01-01 01:00:00,18.29
2015-01-01 02:00:00,16.04
2015-01-01 03:00:00,14.60
2015-01-01 04:00:00,14.95
...,...
2020-12-03 19:00:00,24.59
2020-12-03 20:00:00,18.90
2020-12-03 21:00:00,18.33
2020-12-03 22:00:00,16.18


In [59]:
update[~update.index.duplicated()]

(51936, 1)

In [44]:
df = pd.read_csv('../raw_data/price/Day-ahead Prices_202001010000-202101010000.csv')

In [47]:
df

Unnamed: 0,MTU (CET),Day-ahead Price [EUR/MWh]
0,01.01.2020 00:00 - 01.01.2020 01:00,33.42
1,01.01.2020 01:00 - 01.01.2020 02:00,31.77
2,01.01.2020 02:00 - 01.01.2020 03:00,31.57
3,01.01.2020 03:00 - 01.01.2020 04:00,31.28
4,01.01.2020 04:00 - 01.01.2020 05:00,30.85
...,...,...
8780,31.12.2020 19:00 - 31.12.2020 20:00,-
8781,31.12.2020 20:00 - 31.12.2020 21:00,-
8782,31.12.2020 21:00 - 31.12.2020 22:00,-
8783,31.12.2020 22:00 - 31.12.2020 23:00,-


In [49]:
df[df['Day-ahead Price [EUR/MWh]'] != '-'].duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
8108    False
8109    False
8110    False
8111    False
8112    False
Length: 8113, dtype: bool

In [44]:
pd.concat([old, new]).index.drop_duplicates()

DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 01:00:00',
               '2015-01-01 02:00:00', '2015-01-01 03:00:00',
               '2015-01-01 04:00:00', '2015-01-01 05:00:00',
               '2015-01-01 06:00:00', '2015-01-01 07:00:00',
               '2015-01-01 08:00:00', '2015-01-01 09:00:00',
               ...
               '2020-12-03 14:00:00', '2020-12-03 15:00:00',
               '2020-12-03 16:00:00', '2020-12-03 17:00:00',
               '2020-12-03 18:00:00', '2020-12-03 19:00:00',
               '2020-12-03 20:00:00', '2020-12-03 21:00:00',
               '2020-12-03 22:00:00', '2020-12-03 23:00:00'],
              dtype='datetime64[ns]', name='time', length=51936, freq=None)

In [22]:
today = date.today()  # today's date
start = datetime.combine(today, datetime.min.time())  # initialize to midnight
stop = start + timedelta(days=1)
df = pd.concat([get_price(), get_new_price(start, stop)])

In [68]:
def get_shifted_price_test():
    """Takes in dataframe and performs shift to compensate for daylight saving"""
    today = date.today()  # today's date
    start = datetime.combine(today, datetime.min.time())  # initialize to midnight
    stop = start + timedelta(days=1)
    
    old_price = get_price().loc[:'2020-12-02 23:00:00']
    new_price = get_new_price(start, stop)
    new_price.index.name = 'time'
    
    df = pd.concat([old_price, new_price])
    df.index = pd.to_datetime(df.index)
    
    df_1 = df.loc['2015-01-01 00:00:00':'2015-03-29 01:00:00']
    df_2 = df.loc['2015-03-29 02:00:00':'2015-10-25 02:00:00']
    df_3 = df.loc['2015-10-25 03:00:00':'2016-03-27 01:00:00']
    df_4 = df.loc['2016-03-27 02:00:00':'2016-10-30 02:00:00']
    df_5 = df.loc['2016-10-30 03:00:00':'2017-03-26 01:00:00']
    df_6 = df.loc['2017-03-26 02:00:00':'2017-10-29 02:00:00']
    df_7 = df.loc['2017-10-29 03:00:00':'2018-03-25 01:00:00']
    df_8 = df.loc['2018-03-25 02:00:00':'2018-10-28 02:00:00']
    df_9 = df.loc['2018-10-28 03:00:00':'2019-03-31 01:00:00']
    df_10 = df.loc['2019-03-31 02:00:00':'2019-10-27 02:00:00']
    df_11 = df.loc['2019-10-27 03:00:00':'2020-03-29 01:00:00']
    df_12 = df.loc['2020-03-29 02:00:00':'2020-10-25 02:00:00']
    df_13 = df.loc['2020-10-25 03:00:00':]
    df_shift = [df_2, df_4, df_6, df_8, df_10, df_12]
    no_shift = [df_1, df_3, df_5, df_7, df_9, df_11, df_13]
    price_df = df_1
    for data in no_shift[1:]:
        price_df = pd.concat([price_df, data])
    for data in df_shift:
        data = data.shift(periods=-1).dropna()
        price_df = pd.concat([price_df, data])
    price_df = price_df.sort_index()
    return price_df