In [66]:
import os
from pathlib import Path
import pandas as pd
from dateutil.relativedelta import relativedelta
import random
import datetime

In [75]:
def load_spreadsheet(csv_path):
    table = pd.read_csv(csv_path, 
                        sep=";",
                        header = None,
                        names = ["O","H","L","C","V"],
                       parse_dates = True)
    return table[["O", "C"]]

def random_timestamp(start, end):
    seconds = (end-start).total_seconds()
    seconds = random.randint(0,int(seconds))
    return start + relativedelta(seconds = seconds)


class DataManager:
    def __init__(self, 
                 data_path = "./reduced",
                horizons = {"1M":1,"5M":5,"15M":15,
                            "30M":30,"1H":60,"2H":120,
                           "4H":240, "8H":420, "12H":720,
                           "16H":960, "24H":1440},
                channel_size = 256):
        self.currency_gen = Path(data_path).glob("./*")
        self.raw_data = {}
        self.start_end = {}
        self.horizons = horizons
        self.channel_size = channel_size
        
    def get_sample(self,currency):
        index = self.get_random_timestamp(currency)
        
    def load_currency(self, currency_path):
        file_list = []
        for file in currency_path.glob("./*.csv"):
            file_list.append(load_spreadsheet(file))
        return pd.concat(file_list).sort_index()
    
    def load_all(self):
        for currency in self.currency_gen:
            if "." not in currency.name:
                self.raw_data[currency.name] = self.load_currency(currency)
                table = self.raw_data[currency.name]
                start = table.index.min()+relativedelta(months = 16)
                end = table.index.max()-relativedelta(years = 1)
                self.start_end[currency.name] = [start, end]
            
    def get_random_timestamp(self, currency):
        start, end = self.start_end[currency]
        random_time = random_timestamp(start, end)
        index = self.raw_data[currency].index.get_loc(random_time, method = "nearest")
        return index
        
    def get_norm_params(self,
                        currency,
                        horizon):
        pass
    
    def get_splits(self,
                  currency,
                  horizon):
        pass
        

In [76]:
#tests
def norm_params_stability():
    pass
def splits_stability():
    pass
def spreadsheet_load():
    dataManager = DataManager()
    dataManager.load_all()
    table = dataManager.raw_data["AUDCAD"]
    return dataManager, table

In [77]:
dataManager, table = spreadsheet_load()

In [70]:
now = datetime.datetime.now()
table["C"].iloc[0:368640].resample("1H").last().dropna()
print(datetime.datetime.now() - now)

0:00:00.011206


In [78]:
dataManager.get_random_timestamp("AUDCAD")

2508224

In [58]:
minim = table.index.min()+relativedelta(months = 16)
maxim = table.index.max()-relativedelta(years = 1)

In [62]:
random_timestamp(minim,maxim)

Timestamp('2013-04-25 11:02:40')

In [49]:
(maxim - minim).total_seconds()

306201480.0