# Exploration of data

In [1]:
# Imports
import pandas as pd
import os
import yfinance as yf

In [2]:
# Navigating to the right directory (only to be run once)
os.chdir("..")

In [3]:
# Downloading and filtering data if needed
nordpool = pd.read_csv(os.path.join("data", "processed", "nordpool_estonia.csv"), decimal=",")

ilmateenistus = pd.read_excel(os.path.join("data", "raw", "Tartu-Toravere_2004-2020.xlsx"), header=1)
ilmateenistus.drop(columns=["Tunni miinimum õhutemperatuur °C", "Tunni maksimum õhutemperatuur °C", 
                            "10 minuti keskmine tuule suund", "Tunni maksimum tuule kiirus m/s", 
                            "Õhurõhk jaama kõrgusel hPa"], inplace=True)
ilmateenistus = ilmateenistus[ilmateenistus["Aasta"] >= 2013]
ilmateenistus.rename(columns={"Õhurõhk merepinna kõrgusel hPa": "Õhurõhk (hPa)",
                              "10 minuti keskmine tuule kiirus m/s": "Tuule kiirus (m/s)",
                              "Tunni sademete summa mm": "Sademed (mm)"}, inplace=True)

In [4]:
# YFinance Dutch TTF Natural Gas Calendar
gas = yf.Ticker("TTF=F").history(period="max")[["Open", "Close"]]
print(gas.shape)
print(gas.head(10))
gas.describe()

(1023, 2)
                 Open      Close
Date                            
2017-10-23  18.090000  18.090000
2017-10-24  17.959999  17.959999
2017-10-25  18.110001  18.110001
2017-10-26  18.070000  18.070000
2017-10-27  18.150000  18.150000
2017-10-30  18.110001  18.110001
2017-10-31  18.285000  18.285000
2017-11-01  18.549999  18.549999
2017-11-02  18.580000  18.580000
2017-11-03  18.309999  18.309999


Unnamed: 0,Open,Close
count,1023.0,1023.0
mean,21.302181,21.284703
std,17.115954,16.990452
min,3.7,3.51
25%,12.25,12.2675
50%,17.969999,18.004999
75%,22.6325,22.59
max,131.024994,116.019997


In [5]:
# Nordpool
print(nordpool.shape)
print(nordpool.describe())
nordpool[(pd.isna(nordpool['consumption'])) | (pd.isna(nordpool['elspot_price'])) | (nordpool['consumption'] == 0)]

(51618, 4)
       elspot_price   consumption
count  51612.000000  51599.000000
mean      43.761003    934.528809
std       25.546023    201.366031
min       -1.730000      0.000000
25%       29.210000    783.000000
50%       38.915000    921.000000
75%       51.740000   1069.000000
max      283.390000   1582.000000


Unnamed: 0,Date,Hours,elspot_price,consumption
2066,27-03-2016,02 - 03,,
6199,15-09-2016,07 - 08,43.19,0.0
6200,15-09-2016,08 - 09,43.19,0.0
6201,15-09-2016,09 - 10,43.18,0.0
6202,15-09-2016,10 - 11,43.18,0.0
6203,15-09-2016,11 - 12,43.17,0.0
10805,26-03-2017,02 - 03,,
12220,24-05-2017,01 - 02,21.22,0.0
12221,24-05-2017,02 - 03,19.61,0.0
12621,09-06-2017,18 - 19,37.61,0.0


In [6]:
# Nordpool
print(nordpool.shape)
print(nordpool.describe())
nordpool[(pd.isna(nordpool['consumption'])) | (pd.isna(nordpool['elspot_price'])) | (nordpool['consumption'] == 0)]

(51618, 4)
       elspot_price   consumption
count  51612.000000  51599.000000
mean      43.761003    934.528809
std       25.546023    201.366031
min       -1.730000      0.000000
25%       29.210000    783.000000
50%       38.915000    921.000000
75%       51.740000   1069.000000
max      283.390000   1582.000000


Unnamed: 0,Date,Hours,elspot_price,consumption
2066,27-03-2016,02 - 03,,
6199,15-09-2016,07 - 08,43.19,0.0
6200,15-09-2016,08 - 09,43.19,0.0
6201,15-09-2016,09 - 10,43.18,0.0
6202,15-09-2016,10 - 11,43.18,0.0
6203,15-09-2016,11 - 12,43.17,0.0
10805,26-03-2017,02 - 03,,
12220,24-05-2017,01 - 02,21.22,0.0
12221,24-05-2017,02 - 03,19.61,0.0
12621,09-06-2017,18 - 19,37.61,0.0


In [7]:
# Ilmateenistus
print(ilmateenistus.shape)
# ilmateenistus['10 minuti keskmine tuule kiirus m/s'] = ilmateenistus['10 minuti keskmine tuule kiirus m/s'].fillna(0)
print(ilmateenistus[(pd.isna(ilmateenistus['Õhurõhk (hPa)'])) |
                    (pd.isna(ilmateenistus['Tuule kiirus (m/s)']))][["Aasta", "Kuu", "Päev", "Õhurõhk (hPa)", "Tuule kiirus (m/s)"]])
ilmateenistus.describe()

(70128, 9)
        Aasta  Kuu  Päev  Õhurõhk (hPa)  Tuule kiirus (m/s)
82243    2013    5    19            NaN                 NaN
82244    2013    5    19            NaN                 NaN
82245    2013    5    19            NaN                 NaN
82246    2013    5    19            NaN                 NaN
82247    2013    5    19            NaN                 NaN
82248    2013    5    20            NaN                 NaN
82249    2013    5    20            NaN                 NaN
82250    2013    5    20            NaN                 NaN
82251    2013    5    20            NaN                 NaN
82252    2013    5    20            NaN                 NaN
82253    2013    5    20            NaN                 NaN
82254    2013    5    20            NaN                 NaN
82255    2013    5    20            NaN                 NaN
82256    2013    5    20            NaN                 NaN
82257    2013    5    20            NaN                 NaN
82258    2013    5    20     

Unnamed: 0,Aasta,Kuu,Päev,Õhurõhk (hPa),Sademed (mm),Suhteline õhuniiskus %,Õhutemperatuur °C,Tuule kiirus (m/s)
count,70128.0,70128.0,70128.0,70112.0,70128.0,70128.0,70128.0,70106.0
mean,2016.501027,6.52293,15.729637,1013.852719,0.071767,78.806183,7.041021,2.386462
std,2.291453,3.448728,8.800155,10.704724,0.444434,17.972437,8.95449,1.329329
min,2013.0,1.0,1.0,966.9,0.0,12.0,-29.4,0.0
25%,2015.0,4.0,8.0,1007.3,0.0,68.0,0.7,1.3
50%,2016.5,7.0,16.0,1014.0,0.0,85.0,6.3,2.2
75%,2019.0,10.0,23.0,1020.6,0.0,93.0,14.0,3.2
max,2020.0,12.0,31.0,1051.3,30.0,100.0,31.6,9.7
