# 目的
`SQLBaseforFX`を継承する形で、解析結果を保存するテーブルを作成するためのクラスを用意する。   
解析メソッドなどは別途`FXAnalyzer`クラスに定義し、そのクラスで処理したものをデータベースに追加する。   
`FXanalyzer`自身はデータベース用クラスとは別に呼び出されるようにする。

In [1]:
import datetime
from FX.FX import SQLBaseforFX, OHLCanalyzer, SQLAnaforFX

In [2]:
a = datetime.datetime(1990, 12, 31, 23, 59, 59) - datetime.datetime(1900, 12, 31, 22, 59, 58)
a.days, a.seconds, a.total_seconds() # dimedeltaは(日, 秒)のペアで、total_secondsが便利。

(32872, 3601, 2840144401.0)

## クラスの定義

In [2]:
class SQLAnaforFX(SQLBaseforFX):
    """
    """
    def __init__(self, *args, **kwargs):
        """
        Initialization
        """
        super().__init__(*args, **kwargs)
        self.initialize_ana_tables()
    
    def initialize_ana_tables(self, recreate=False):
        """
        Initialize the tables related to analysis.
        If `recreate` is True, then the tables will be dropped and recreated.
        """
        self._mins = [1, 5, 10, 15, 30, 60]
        
        self._asktblnames = ["ask{0:02d}min".format(n) for n in self._mins]
        for tblname in self._asktblnames:
            self.maketable(tblname, "(datetime varchar(255), open real, high real, low real, close real)", recreate)
            
        self._bidtblnames = ["bid{0:02d}min".format(n) for n in self._mins]
        for tblname in self._bidtblnames:
            self.maketable(tblname, "(datetime varchar(255), open real, high real, low real, close real)", recreate)
        
    def addOHLCRecordFromDataFrame(self, df, minute, bid_or_ask):
        """
        datetimes = df.index
        open = df["open"]
        high = df["high"]
        low = df["low"]
        close = df["close"]
        This method is used in case of making a test table after recording, etc., for real-time recording.
        """
        datetimes = list(df.index)
        opens = df["open"].as_matrix()
        highs = df["high"].as_matrix()
        lows = df["low"].as_matrix()
        closes = df["close"].as_matrix()
        dataset = [(datetimes[ii], opens[ii], highs[ii], lows[ii], closes[ii]) for ii in range(len(datetimes))]
        
        tblname = self.__select_table(minute, bid_or_ask)
        insert_sql = '''insert into {} (datetime, open, high, low, close) values (?,?,?,?,?)'''.format(tblname)
        res = self.executemany(insert_sql, dataset)
        
    def addOHLCRecord(self, dataset, minute, bid_or_ask):
        """
        Add (, or insert) a OHLC set (dt, open, high, low, close).
        binsize [min]
        dataset = (datetime, open, high, low, close)
        This method is used mainly for real-time recording.
        """
        tblname = self.__select_table(minute, bid_or_ask)
        insert_sql = '''insert into {} (datetime, open, high, low, close) values (?,?,?,?,?)'''.format(tblname)
        res = self.execute(insert_sql, dataset)
    
    def __select_table(self, minute, bid_or_ask):
        if bid_or_ask == "ask":
            return self._asktblnames[self._mins.index(minute)]
        else:
            return self._bidtblnames[self._mins.index(minute)]

### クラスのテスト

In [2]:
sql = SQLAnaforFX(year_month="201704")

In [5]:
sql.execute("select name from sqlite_master where type='table'")

[('main',),
 ('ask01min',),
 ('ask05min',),
 ('ask10min',),
 ('ask15min',),
 ('ask30min',),
 ('ask60min',),
 ('bid01min',),
 ('bid05min',),
 ('bid10min',),
 ('bid15min',),
 ('bid30min',),
 ('bid60min',)]

In [6]:
sql.execute("select close from ask05min")

[(111.44,),
 (111.43,),
 (111.47,),
 (111.47,),
 (111.46,),
 (111.46,),
 (111.39,),
 (111.46,),
 (111.43,),
 (111.38,),
 (111.31,),
 (111.32,),
 (111.3,),
 (111.32,),
 (111.33,),
 (111.36,),
 (111.39,),
 (111.4,),
 (111.41,),
 (111.44,),
 (111.44,),
 (111.42,),
 (111.4,),
 (111.4,),
 (111.39,),
 (111.43,),
 (111.37,),
 (111.3,),
 (111.26,),
 (111.26,),
 (111.28,),
 (111.29,),
 (111.32,),
 (111.36,),
 (111.35,),
 (111.33,),
 (111.31,),
 (111.29,),
 (111.32,),
 (111.31,),
 (111.35,),
 (111.41,),
 (111.4,),
 (111.37,),
 (111.39,),
 (111.4,),
 (111.41,),
 (111.37,),
 (111.36,),
 (111.31,),
 (111.34,),
 (111.32,),
 (111.33,),
 (111.34,),
 (111.36,),
 (111.37,),
 (111.36,),
 (111.39,),
 (111.39,),
 (111.38,),
 (111.39,),
 (111.45,),
 (111.48,),
 (111.49,),
 (111.53,),
 (111.5,),
 (111.53,),
 (111.48,),
 (111.47,),
 (111.57,),
 (111.55,),
 (111.5,),
 (111.49,),
 (111.54,),
 (111.46,),
 (111.47,),
 (111.44,),
 (111.44,),
 (111.49,),
 (111.43,),
 (111.42,),
 (111.43,),
 (111.45,),
 (111.42,),
 

In [3]:
res = sql.toDataFrame()

In [18]:
len(res.index)/12

26896.833333333332

## FXanalyzer

In [4]:
ana = OHLCanalyzer(res)

40627
8126
4063
2709
1355
678


In [5]:
len(ana._datetimes[-1]),len(ana._is_weekdays[-1])

(678, 678)

In [6]:
for ii in range(len(ana._datetimes[-1])):
    print(ana._datetimes[-1][ii], ana._is_weekdays[-1][ii])

2017-04-01 01:52:13 True
2017-04-01 02:52:13 True
2017-04-01 03:52:13 True
2017-04-01 04:52:13 True
2017-04-01 05:52:13 True
2017-04-01 06:52:13 False
2017-04-01 07:52:13 False
2017-04-01 08:52:13 False
2017-04-01 09:52:13 False
2017-04-01 10:52:13 False
2017-04-01 11:52:13 False
2017-04-01 12:52:13 False
2017-04-01 13:52:13 False
2017-04-01 14:52:13 False
2017-04-01 15:52:13 False
2017-04-01 16:52:13 False
2017-04-01 17:52:13 False
2017-04-01 18:52:13 False
2017-04-01 19:52:13 False
2017-04-01 20:52:13 False
2017-04-01 21:52:13 False
2017-04-01 22:52:13 False
2017-04-01 23:52:13 False
2017-04-02 00:52:13 False
2017-04-02 01:52:13 False
2017-04-02 02:52:13 False
2017-04-02 03:52:13 False
2017-04-02 04:52:13 False
2017-04-02 05:52:13 False
2017-04-02 06:52:13 False
2017-04-02 07:52:13 False
2017-04-02 08:52:13 False
2017-04-02 09:52:13 False
2017-04-02 10:52:13 False
2017-04-02 11:52:13 False
2017-04-02 12:52:13 False
2017-04-02 13:52:13 False
2017-04-02 14:52:13 False
2017-04-02 15:52:

In [7]:
len(ana._time_edges[0]), sum(ana._is_weekdays[0]), ana._datetimes[0]

(40627,
 29107,
 array(['2017-04-01 00:53:13', '2017-04-01 00:54:13', '2017-04-01 00:55:13',
        ..., '2017-04-29 05:57:13', '2017-04-29 05:58:13',
        '2017-04-29 05:59:13'], 
       dtype='<U19'))

In [17]:
def is_weekend(dt):
    """
    Judge whether the day is weekend.
    Here `weekend` correspoonds to the range from 06:00:00 on Saturday to 06:00:00 on Monday.
    """
    if dt.weekday() == 5:
        return dt.hour >= 6
    elif dt.weekday() == 6:
        return True
    elif dt.weekday() == 0:
        return dt.hour < 6

import datetime
is_weekend(datetime.datetime.now()) == False

True

In [24]:
ana.calc_ohlc()

In [27]:
ana.asks_edge.shape

(6067, 4)

In [37]:
 len(np.arange(ana._timestamps_ary[0], ana._timestamps_ary[-1], 60))

40627

In [13]:
ana._time_edges

[array([  0.00000000e+00,   6.00000000e+01,   1.20000000e+02, ...,
          2.43744000e+06,   2.43750000e+06,   2.43756000e+06]),
 array([  0.00000000e+00,   3.00000000e+02,   6.00000000e+02, ...,
          2.43660000e+06,   2.43690000e+06,   2.43720000e+06]),
 array([  0.00000000e+00,   6.00000000e+02,   1.20000000e+03,
          1.80000000e+03,   2.40000000e+03,   3.00000000e+03,
          3.60000000e+03,   4.20000000e+03,   4.80000000e+03,
          5.40000000e+03,   6.00000000e+03,   6.60000000e+03,
          7.20000000e+03,   7.80000000e+03,   8.40000000e+03,
          9.00000000e+03,   9.60000000e+03,   1.02000000e+04,
          1.08000000e+04,   1.14000000e+04,   1.20000000e+04,
          1.26000000e+04,   1.32000000e+04,   1.38000000e+04,
          1.44000000e+04,   1.50000000e+04,   1.56000000e+04,
          1.62000000e+04,   1.68000000e+04,   1.74000000e+04,
          1.90800000e+05,   1.91400000e+05,   1.92000000e+05,
          1.92600000e+05,   1.93200000e+05,   1.93800000

In [None]:
ana.calc_ohlc(5)

In [None]:
asks, bids = ana.toDataFrame()

In [None]:
asks

In [None]:
bids.as_matrix()[-1]

### Plot

In [None]:
import matplotlib.finance as mpf
import matplotlib.dates as mdates
from matplotlib import ticker
from FX.FX import drawfigfunc as dff

In [None]:
time_edge = ana._time_edges[1]
start = 1000
end = start + 50
dff.makefig(5,5)
# 1min
ohlc = np.hstack(((time_edge/60/60)[:, None], ana.bids_edge)) #x軸データを整数に
ind = ana.bids_edge[:, 0] != 0
ohlc = ohlc[ind]
mpf.candlestick_ohlc(plt.gca(), ohlc[start:end+1], width=0.8*np.diff(ohlc[:,0])[0], colorup='r', colordown='b')
# plt.plot((time_1min-time_1min[0])/60, sma_bid_1min.get(keys[0]), label=keys[0])
# plt.plot((time_1min-time_1min[0])/60, sma_bid_1min.get(keys[1]), label=keys[1])
# plt.plot((time_1min-time_1min[0])/60, sma_bid_1min.get(keys[2]), label=keys[2])
# plt.legend(loc=2)
plt.xticks([ohlc[start, 0], ohlc[start+(end-start)//3, 0], ohlc[start+(end-start)*2//3, 0], ohlc[end, 0]])
dff.arrangefig(xlabel="Time [hour]", ylabel="Bid [yen]", title="1min")

### データの追加

In [None]:
sql.addOHLCRecordFromDataFrame(asks, 5, "ask")

In [None]:
print("before")
sql.execute("select close from ask05min")

# 全ビンサイズでOHLCの計算

In [1]:
sql = SQLAnaforFX(year_month="201704")
sql.initialize_ana_tables(recreate=True)
print("after")
sql.execute("select close from ask05min")

res = sql.toDataFrame()

NameError: name 'SQLAnaforFX' is not defined

In [5]:
ana = OHLCanalyzer(res)

Process timestamps...
Make bins...


In [6]:
for ii in range(len(ana._mins)):
    ana.calc_ohlc(ana._mins[ii])
    asks, bids = ana.toDataFrame()
    sql.addOHLCRecordFromDataFrame(asks, ana._mins[ii], "ask")
    sql.addOHLCRecordFromDataFrame(bids, ana._mins[ii], "bid")

## 中身の確認

In [7]:
sql.execute("select close from ask10min")

[(111.43,),
 (111.47,),
 (111.46,),
 (111.46,),
 (111.38,),
 (111.32,),
 (111.32,),
 (111.36,),
 (111.4,),
 (111.44,),
 (111.42,),
 (111.4,),
 (111.43,),
 (111.3,),
 (111.26,),
 (111.29,),
 (111.36,),
 (111.33,),
 (111.29,),
 (111.31,),
 (111.41,),
 (111.37,),
 (111.4,),
 (111.37,),
 (111.31,),
 (111.32,),
 (111.34,),
 (111.37,),
 (111.39,),
 (111.39,),
 (111.48,),
 (111.53,),
 (111.53,),
 (111.47,),
 (111.55,),
 (111.49,),
 (111.46,),
 (111.44,),
 (111.49,),
 (111.42,),
 (111.45,),
 (111.43,),
 (111.44,),
 (111.42,),
 (111.43,),
 (111.34,),
 (111.35,),
 (111.34,),
 (111.31,),
 (111.29,),
 (111.31,),
 (111.33,),
 (111.34,),
 (111.38,),
 (111.39,),
 (111.39,),
 (111.48,),
 (111.48,),
 (111.46,),
 (111.46,),
 (111.45,),
 (111.46,),
 (111.41,),
 (111.43,),
 (111.47,),
 (111.33,),
 (111.3,),
 (111.32,),
 (111.3,),
 (111.32,),
 (111.33,),
 (111.34,),
 (111.38,),
 (111.38,),
 (111.39,),
 (111.43,),
 (111.37,),
 (111.37,),
 (111.23,),
 (111.19,),
 (111.22,),
 (111.09,),
 (110.74,),
 (110.71,)

In [8]:
sql.toDataFrame(tblname="ask30min")

select open,high,low,close from ask30min


Unnamed: 0,open,high,low,close
2017-04-01 01:22:13,111.43,111.49,111.40,111.46
2017-04-01 01:52:13,111.47,111.47,111.29,111.32
2017-04-01 02:22:13,111.32,111.41,111.28,111.40
2017-04-01 02:52:13,111.40,111.45,111.37,111.40
2017-04-01 03:22:13,111.40,111.44,111.25,111.26
2017-04-01 03:52:13,111.26,111.37,111.25,111.33
2017-04-01 04:22:13,111.33,111.41,111.28,111.41
2017-04-01 04:52:13,111.41,111.43,111.35,111.37
2017-04-01 05:22:13,111.37,111.37,111.30,111.34
2017-04-01 05:52:13,111.34,111.41,111.34,111.39


In [9]:
sql.toDataFrame(tblname="bid30min")

select open,high,low,close from bid30min


Unnamed: 0,open,high,low,close
2017-04-01 01:22:13,111.42,111.48,111.39,111.45
2017-04-01 01:52:13,111.46,111.46,111.28,111.31
2017-04-01 02:22:13,111.31,111.40,111.27,111.39
2017-04-01 02:52:13,111.39,111.44,111.36,111.39
2017-04-01 03:22:13,111.39,111.43,111.24,111.25
2017-04-01 03:52:13,111.25,111.36,111.24,111.32
2017-04-01 04:22:13,111.32,111.40,111.27,111.40
2017-04-01 04:52:13,111.40,111.42,111.34,111.36
2017-04-01 05:22:13,111.36,111.36,111.29,111.33
2017-04-01 05:52:13,111.33,111.40,111.33,111.38


In [10]:
sql.toDataFrame()

select dateval,ask,bid from main


Unnamed: 0,dateval,ask,bid
2017-04-01 00:52:13,2.017040e+13,111.43,111.42
2017-04-01 00:52:18,2.017040e+13,111.43,111.42
2017-04-01 00:52:24,2.017040e+13,111.43,111.42
2017-04-01 00:52:29,2.017040e+13,111.43,111.42
2017-04-01 00:52:34,2.017040e+13,111.43,111.42
2017-04-01 00:52:39,2.017040e+13,111.42,111.41
2017-04-01 00:52:44,2.017040e+13,111.41,111.40
2017-04-01 00:52:49,2.017040e+13,111.40,111.39
2017-04-01 00:52:54,2.017040e+13,111.40,111.39
2017-04-01 00:52:59,2.017040e+13,111.42,111.41


# テストコード

In [None]:
datetime.datetime(1990, 1, 1, 1, 1, 1) + datetime.timedelta(0, 365*86400)

In [None]:
ana._timestamps_ary // 86400

In [None]:
ana._timestamps_ary % 86400

In [None]:
(ana._timestamps_ary // 86400) * 86400 + ana._timestamps_ary % 86400 == ana._timestamps_ary

In [None]:
a = np.array(["0", "2", "3"])
a[np.array([True, True, False])][0]
