In [1]:
import csv
import numpy as np
from datetime import datetime

In [2]:
import pandas

In [5]:
import matplotlib
%matplotlib inline  

ValueError: unknown locale: UTF-8

# Constants

In [16]:
STATION_ID_COL_NAME = 'station id'
STATION_NAME_COL_NAME = 'station name'
PARAM_T = 'T [°C]'
PARAM_RAIN = 'količina padavin [mm]'

# Initial Configuration

# Utils

In [112]:
def ARSO_datetime_to_epoch(datetime_string):
    """Convert ARSO datetime into epoch time in seconds"""
    utc_time = datetime.strptime(datetime_string, "%Y-%m-%d %H:%M")
    epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds()
    return epoch_time


def ARSO_datetime_to_epoch_array(datetime_strings):
    """Convert list of ARSO datetimes into epoch time in seconds"""
    return [ARSO_datetime_to_epoch(dt) for dt in datetime_strings]


def borzen_datetime_to_epoch(datetime_string):
    """Convert borzen datetime into epoch time in seconds"""
    try:
        utc_time = datetime.strptime(datetime_string, "%d.%m.%Y %H")
    except:
        # strptime accepts [0,23] hour interval, while borzen data has [1,24] hour 
        # interval, thus modification of '24' into '00' is needed
        if datetime_string[-2:] == "24":
            datetime_string = datetime_string[:-2] + "00"
            utc_time = datetime.strptime(datetime_string, "%d.%m.%Y %H")
        else:
            print("Error in datetime parsing: " + datetime_string)
    epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds()
    return epoch_time

# Library

In [54]:
class Station:
    def __init__(self, station_id, station_name, header, data, data_timestamp_epoch):
        self.station_is = station_id
        self.station_name = station_name
        self.header = header
        self.data = data
        self.size = len(data)
        self.data_ts = data_timestamp_epoch
    
    def __str__(self):
        return self.station_id + " " + self.station_name + " size=" + str(self.size)
    
    def __repr(self):
        return self.station_id + " " + self.station_name + " size=" + str(self.size)

In [102]:
class Prices:
    def __init__(self, prices, prices_timestamp_epoch):
        self.prices = prices
        self.timestamps = prices_timestamp_epoch

In [127]:
def test_load_ksedlo():
    """Load weather data for Korenjsko sedlo that has been dowloaded 
    from ARSO website. It removes lines that have any NaN values."""
    print('Start loading korenjsko sedlo')
    import csv
    test_file_ksedlo = '/Users/ales/Desktop/STUDIJ/DodatnoLeto/MatematikaZracunalnikom/WeatherDataTest/KorenjskoSedloTest'
    # open file and read content in a csv format
    csvfile = open(test_file_ksedlo, 'r')
    reader = csv.reader(csvfile, delimiter=',')
    data = list(reader)
    # extract header and clean up data by removing empty lines
    header_line = [el.strip() for el in data[0]]
    station_id = header_line[0]
    station_name = header_line[1]
    header = header_line[2:]
    # there are empty lines generated by parsing that needs to be removed
    data_full = data[2:len(data):2]
    data_empty = data[1:len(data):2]
    # check if 'empty' lines are really empty
    for lst in data_empty:
        if len(lst) != 0:
            # there is a non empty line
            print("Warning: not empty line " + lst)
            return None
    # remove station_id column, station_name column and datatime 
    # and create numpy matrix with floats
    data_clean = []
    for line in data_full:
        try:
            line_clean_float = list(map(float, line[3:]))
            data_clean.append(line_clean_float)
        except:
            # lines that have missing values are removed
            pass
    data_clean_np = np.array(data_clean)
    # extract datetime and convert it into timestamp epoch
    data_timestamps = np.array([[ARSO_datetime_to_epoch(line[2])] 
                                for line in data_full])
    # finish
    print('Loading successful')
    return Station(station_id, station_name, header, data_clean_np, data_timestamps)

In [123]:
def test_load_borzen_price():
    """Load borzen power prices."""
    print("Load test borzen prices.")
    file_name = '/Users/ales/Desktop/STUDIJ/DodatnoLeto/MatematikaZracunalnikom/Borzen/Cena/Cp_in_Cn_2017_/01 2017-Table 1.csv'
    # open file and read content
    csvfile = open(file_name, 'r')
    reader = csv.reader(csvfile, delimiter=';')
    data = list(reader)
    # get header and clean data
    header = data[7]
    data_clean = data[9:]
    # extract datetime and convert it into timestamp epoch; extract also SIPX values;
    # warning, SIPX values uses comma as decimal marker
    timestamps = []
    prices = []
    for line in data_clean:
        timestamps.append(borzen_datetime_to_epoch(line[0]))
        prices.append(float(line[1].strip().replace(",",".")))
    # convert list into numpy array
    timestamps = np.array(timestamps)
    prices = np.array(prices)
    # finish
    print("Loading test borzen prices successful!")
    # TODO 
    return Prices(prices, timestamps)

# Load Data

In [128]:
# Load training data ksedlo and power prices
ksedlo_station = test_load_ksedlo()
#consumption = DataLoader.test_load_borzen_consumption()

Start loading korenjsko sedlo
Loading successful


In [129]:
# Check data and remove unnecessary columns
ksedlo_station.data
ksedlo_station.size

1440

# ARIMA 

In [130]:
# Load price data
prices = test_load_borzen_price()

Load test borzen prices.
Loading test borzen prices successful!


In [131]:
prices.prices

array([  53.  ,   52.  ,   51.  ,   47.27,   45.49,   17.67,   11.1 ,
         15.47,   16.88,   35.1 ,   35.1 ,   48.02,   49.57,   48.69,
         46.91,   49.43,   50.45,   53.3 ,   59.07,   61.71,   56.26,
         55.  ,   54.  ,   52.  ,   51.8 ,   32.7 ,   28.11,   27.89,
         27.73,   32.6 ,   47.01,   57.67,   64.15,   62.5 ,   61.7 ,
         61.2 ,   55.69,   55.67,   61.19,   59.71,   60.96,   73.55,
         63.1 ,   63.1 ,   70.11,   59.69,   54.  ,   49.  ,   36.02,
         34.98,   33.64,   33.89,   33.5 ,   46.74,   55.  ,   59.73,
         61.81,   61.81,   61.94,   63.8 ,   57.49,   56.74,   60.  ,
         64.01,   68.94,   67.01,   61.99,   58.5 ,   60.  ,   59.48,
         63.  ,   56.74,   62.36,   56.04,   54.1 ,   52.  ,   51.36,
         53.02,   62.4 ,   68.  ,   73.01,   73.  ,   72.85,   72.51,
         66.42,   69.16,   69.  ,   67.13,   68.26,   72.15,   76.8 ,
         71.01,   69.84,   64.74,   62.36,   60.62,   51.22,   48.41,
         50.  ,   47