In [1]:
import boto3
import pandas as pd
import numpy as np
import io
import sys
import os
from joblib import Parallel, delayed
# local libraries
sys.path.append(os.path.abspath(os.path.join(sys.path[0],'..','src','lib')))
import market_maker_functions
# modeling
from sklearn import linear_model
from sklearn.utils import resample
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, classification_report
pd.options.mode.chained_assignment = None

In [37]:
%load_ext autoreload
%autoreload 2

In [57]:
%%time
mm = market_maker_functions.MarketMaker()
coin_pairs = ('TRXETH', 'ETHUSDT')
trade_df_list = Parallel(n_jobs=2)(delayed(mm.pandas_get_trades)(coin_pair, '320 min ago UTC') for coin_pair in coin_pairs)
target_df = trade_df_list[0]
through_df = trade_df_list[1]
# Engineer target coin pair features 
target_df = target_df.groupby(['minute']).agg({'avg_price':'mean','quantity':'mean','trade_count':'count'}).reset_index()
target_df['prev_min_price'] = target_df['avg_price'].shift(1)
target_df['prev_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_min_price']) / target_df['prev_min_price'] * 100
target_df['prev_5_min_price'] = target_df['avg_price'].shift(5)
target_df['prev_5_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_5_min_price']) / target_df['prev_5_min_price'] * 100
target_df['prev_10_min_price'] = target_df['avg_price'].shift(10)
target_df['prev_10_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_10_min_price']) / target_df['prev_10_min_price'] * 100
target_df['futr_10_min_price'] = target_df['avg_price'].shift(-10)
target_df['futr_10_min_perc_chg'] = (target_df['futr_10_min_price'] - target_df['avg_price']) / target_df['avg_price'] * 100

target_df['avg_5_min_price'] = (target_df['avg_price'] 
                                + target_df['avg_price'].shift(1) 
                                + target_df['avg_price'].shift(2) 
                                + target_df['avg_price'].shift(3) 
                                + target_df['avg_price'].shift(4) 
                                + target_df['avg_price'].shift(5)
                            ) / 6
target_df['avg_5_min_qty'] = (target_df['quantity'] 
                                + target_df['quantity'].shift(1) 
                                + target_df['quantity'].shift(2) 
                                + target_df['quantity'].shift(3) 
                                + target_df['quantity'].shift(4) 
                                + target_df['quantity'].shift(5)
                            ) / 6
target_df['avg_5_min_trade_count'] = (target_df['trade_count'] 
                                + target_df['trade_count'].shift(1) 
                                + target_df['trade_count'].shift(2) 
                                + target_df['trade_count'].shift(3) 
                                + target_df['trade_count'].shift(4) 
                                + target_df['trade_count'].shift(5)
                            ) / 6
target_df.rename(columns={'quantity':'avg_qty'}, inplace=True)

# Engineer through coin pair features 
through_df = through_df.groupby(['minute']).agg({'avg_price':'mean','quantity':'mean','trade_count':'count'}).reset_index()
through_df['prev_min_price'] = through_df['avg_price'].shift(1)
through_df['alt_prev_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_min_price']) / through_df['prev_min_price'] * 100
through_df['prev_5_min_price'] = through_df['avg_price'].shift(5)
through_df['alt_prev_5_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_5_min_price']) / through_df['prev_5_min_price'] * 100
through_df['prev_10_min_price'] = through_df['avg_price'].shift(10)
through_df['alt_prev_10_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_10_min_price']) / through_df['prev_10_min_price'] * 100
# Combine features
features_df = pd.merge(target_df, through_df[['minute','alt_prev_min_perc_chg','alt_prev_5_min_perc_chg','alt_prev_10_min_perc_chg']], on='minute', how='left')

CPU times: user 144 ms, sys: 49.8 ms, total: 194 ms
Wall time: 17.8 s


In [43]:
features_df

Unnamed: 0,minute,avg_price,avg_qty,trade_count,prev_min_price,prev_min_perc_chg,prev_5_min_price,prev_5_min_perc_chg,prev_10_min_price,prev_10_min_perc_chg,futr_10_min_price,futr_10_min_perc_chg,avg_5_min_price,avg_5_min_qty,avg_5_min_trade_count,alt_prev_min_perc_chg,alt_prev_5_min_perc_chg,alt_prev_10_min_perc_chg
0,25418760,0.000137,10060.421429,140,,,,,,,0.000139,1.083341,,,,,,
1,25418761,0.000137,14840.293651,126,0.000137,-0.109493,,,,,0.000139,1.320998,,,,-0.048081,,
2,25418762,0.000136,11045.118357,414,0.000137,-0.859708,,,,,0.000138,1.453150,,,,0.037848,,
3,25418763,0.000134,6127.947154,492,0.000136,-1.366934,,,,,0.000139,3.313339,,,,0.355617,,
4,25418764,0.000135,8338.856000,375,0.000134,0.918271,,,,,0.000139,2.779132,,,,0.174453,,
5,25418765,0.000137,5798.159420,207,0.000135,1.392022,0.000137,-0.052823,,,0.000138,0.713871,0.000136,9368.466002,292.333333,0.078010,0.598800,
6,25418766,0.000139,4307.814815,189,0.000137,1.423087,0.000137,1.480626,,,0.000138,-0.812912,0.000137,8409.698233,300.500000,-0.046065,0.600830,
7,25418767,0.000140,4709.535714,112,0.000139,0.657662,0.000136,3.033816,,,0.000138,-1.240121,0.000137,6721.238577,298.166667,-0.060400,0.502029,
8,25418768,0.000141,9713.331126,151,0.000140,0.545993,0.000134,5.032093,,,0.000138,-1.987129,0.000138,6499.274038,254.333333,0.020628,0.166551,
9,25418769,0.000140,10608.542125,273,0.000141,-1.021194,0.000135,3.013567,,,0.000138,-1.154966,0.000139,7246.039867,217.833333,-0.003562,-0.011450,


In [47]:
feature_cols = ['avg_5_min_price','avg_5_min_qty','avg_5_min_trade_count','avg_qty','trade_count','avg_price',
               'prev_min_perc_chg','prev_5_min_perc_chg','prev_10_min_perc_chg',
               'alt_prev_min_perc_chg','alt_prev_5_min_perc_chg','alt_prev_10_min_perc_chg']
target_col = 'futr_10_min_perc_chg'
features_df.loc[:-10,feature_cols]
features_df[10:-11]

Unnamed: 0,minute,avg_price,avg_qty,trade_count,prev_min_price,prev_min_perc_chg,prev_5_min_price,prev_5_min_perc_chg,prev_10_min_price,prev_10_min_perc_chg,futr_10_min_price,futr_10_min_perc_chg,avg_5_min_price,avg_5_min_qty,avg_5_min_trade_count,alt_prev_min_perc_chg,alt_prev_5_min_perc_chg,alt_prev_10_min_perc_chg
10,25418770,0.000139,8144.819209,177,0.000140,-0.455238,0.000137,1.136764,0.000137,1.083341,0.000137,-1.532433,0.000139,7213.700401,184.833333,0.006326,-0.083070,0.515233
11,25418771,0.000139,8715.918033,122,0.000139,0.125360,0.000139,-0.157300,0.000137,1.320998,0.000136,-2.352921,0.000140,7699.993504,170.666667,-0.039175,-0.076182,0.524190
12,25418772,0.000138,8943.652174,161,0.000139,-0.730400,0.000140,-1.534123,0.000136,1.453150,0.000137,-1.084422,0.000139,8472.633063,166.000000,0.006071,-0.009721,0.492259
13,25418773,0.000139,22537.887097,62,0.000138,0.441547,0.000141,-1.636408,0.000134,3.313339,0.000137,-1.195128,0.000139,11444.024960,157.666667,0.005331,-0.025013,0.141496
14,25418774,0.000139,8987.645833,48,0.000139,0.396449,0.000140,-0.227577,0.000135,2.779132,0.000136,-2.601753,0.000139,11323.077412,140.500000,-0.092723,-0.114155,-0.125592
15,25418775,0.000138,16067.608247,97,0.000139,-0.645367,0.000139,-0.418140,0.000137,0.713871,0.000136,-2.007922,0.000139,12232.921766,111.166667,-0.012328,-0.132787,-0.215747
16,25418776,0.000138,6098.755102,98,0.000138,-0.114447,0.000139,-0.656645,0.000139,-0.812912,0.000136,-1.715600,0.000139,11891.911081,98.000000,0.009557,-0.084101,-0.160219
17,25418777,0.000138,5863.459459,37,0.000138,0.224119,0.000138,0.298582,0.000140,-1.240121,0.000135,-2.245443,0.000138,11416.501319,83.833333,-0.019828,-0.109977,-0.119688
18,25418778,0.000138,5816.617021,47,0.000138,-0.214524,0.000139,-0.356555,0.000141,-1.987129,0.000136,-1.739448,0.000138,10895.328793,64.833333,-0.060700,-0.175932,-0.200902
19,25418779,0.000138,4941.209677,62,0.000138,-0.180831,0.000139,-0.929505,0.000140,-1.154966,0.000136,-1.625162,0.000138,7962.549223,64.833333,-0.268918,-0.351980,-0.465733


In [34]:
features_df.iloc[-1]

minute                      2.541877e+07
avg_price                   1.392500e-04
quantity                    1.496600e+04
trade_count                 1.400000e+01
prev_min_price              1.386503e-04
prev_min_perc_chg           4.325107e-01
prev_5_min_price            1.395175e-04
prev_5_min_perc_chg        -1.917388e-01
prev_10_min_price           1.354361e-04
prev_10_min_perc_chg        2.816050e+00
futr_10_min_price                    NaN
futr_10_min_perc_chg                 NaN
alt_prev_min_perc_chg      -5.558913e-03
alt_prev_5_min_perc_chg    -2.700982e-02
alt_prev_10_min_perc_chg   -3.845695e-02
Name: 130, dtype: float64

In [52]:
# Build model
train_df = features_df[10:-11] # hardcoded for futr_10_min outcome
X = train_df[feature_cols]
y = train_df[target_col]
score_df = features_df.iloc[-1]
X_scoring = score_df[feature_cols]

poly = PolynomialFeatures(degree=3)
X_ = poly.fit_transform(X)
predict_ = poly.fit_transform(X_scoring.reshape(1, -1))

clf = linear_model.RidgeCV(alphas=(.00001,.0001,.001,.01,.1,1,10), normalize=True)
clf.fit(X_, y)
predicted_growth = clf.predict(predict_)

  # Remove the CWD from sys.path while we load stuff.


In [56]:
if predicted_growth > .4:
    print('yup')
else:
    print('nope')

nope


In [51]:
X_scoring

avg_5_min_price                0.000137
avg_5_min_qty               6045.314096
avg_5_min_trade_count         30.166667
avg_qty                     7235.272727
trade_count                   22.000000
avg_price                      0.000137
prev_min_perc_chg              0.001381
prev_5_min_perc_chg           -0.517011
prev_10_min_perc_chg          -0.821510
alt_prev_min_perc_chg          0.008351
alt_prev_5_min_perc_chg       -0.047288
alt_prev_10_min_perc_chg      -0.596803
Name: 130, dtype: float64

In [2]:
# When getting data, get all 120 min, then, get agg trades from last id until the minute changes and return
# This way, we'll know that all of the minute features have the full minute.
import boto3
import pandas as pd
import numpy as np
import io
import sys
import os
import time
from joblib import Parallel, delayed
# local libraries
sys.path.append(os.path.abspath(os.path.join(sys.path[0], '..', 'lib')))
import market_maker_functions
# modeling
from sklearn import linear_model
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, classification_report
pd.options.mode.chained_assignment = None

In [8]:
feature_col = ['avg_5_min_price','avg_5_min_qty','avg_5_min_trade_count','avg_qty','trade_count','avg_price',
               'prev_min_perc_chg','prev_5_min_perc_chg','prev_10_min_perc_chg',#'prev_5_min_perc_chg_acc','prev_10_min_perc_chg_acc',
               'alt_prev_min_perc_chg','alt_prev_5_min_perc_chg','alt_prev_10_min_perc_chg',#'alt_prev_5_min_perc_chg_acc','alt_prev_10_min_perc_chg_acc'
              ]
target_col = 'futr_10_min_perc_chg'


# Get recent trade data for both target coin pair and through coin pair
mm = market_maker_functions.MarketMaker()
coin_pairs = ('TRXETH', 'ETHUSDT')
#trade_df_list = Parallel(n_jobs=2)(delayed(mm.pandas_get_trades)(coin_pair, '370 min ago UTC') for coin_pair in coin_pairs)
#target_df = trade_df_list[0]
#through_df = trade_df_list[1]
target_df = mm.pandas_get_trades('TRXETH', '370 min ago UTC')
through_df = mm.pandas_get_trades('ETHUSDT', '370 min ago UTC')

# Engineer target coin pair features 
target_df = target_df.groupby(['minute']).agg({'avg_price':'mean','quantity':'mean','trade_count':'count'}).reset_index()
target_df['prev_min_price'] = target_df['avg_price'].shift(1)
target_df['prev_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_min_price']) / target_df['prev_min_price'] * 100
target_df['prev_5_min_price'] = target_df['avg_price'].shift(5)
target_df['prev_5_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_5_min_price']) / target_df['prev_5_min_price'] * 100
target_df['prev_10_min_price'] = target_df['avg_price'].shift(10)
target_df['prev_10_min_perc_chg'] = (target_df['avg_price'] - target_df['prev_10_min_price']) / target_df['prev_10_min_price'] * 100
target_df['prev_5_min_perc_chg_acc'] = (target_df['prev_min_perc_chg'] - target_df['prev_5_min_perc_chg']) / target_df['prev_min_perc_chg'] * 100
target_df['prev_10_min_perc_chg_acc'] = (target_df['prev_min_perc_chg'] - target_df['prev_10_min_perc_chg']) / target_df['prev_min_perc_chg'] * 100
# Outcome variable
target_df['futr_10_min_price'] = target_df['avg_price'].shift(-10)
target_df['futr_10_min_perc_chg'] = (target_df['futr_10_min_price'] - target_df['avg_price']) / target_df['avg_price'] * 100

target_df['avg_5_min_price'] = (target_df['avg_price'] 
                                + target_df['avg_price'].shift(1) 
                                + target_df['avg_price'].shift(2) 
                                + target_df['avg_price'].shift(3) 
                                + target_df['avg_price'].shift(4) 
                                + target_df['avg_price'].shift(5)
                            ) / 6
target_df['avg_5_min_qty'] = (target_df['quantity'] 
                                + target_df['quantity'].shift(1) 
                                + target_df['quantity'].shift(2) 
                                + target_df['quantity'].shift(3) 
                                + target_df['quantity'].shift(4) 
                                + target_df['quantity'].shift(5)
                            ) / 6
target_df['avg_5_min_trade_count'] = (target_df['trade_count'] 
                                + target_df['trade_count'].shift(1) 
                                + target_df['trade_count'].shift(2) 
                                + target_df['trade_count'].shift(3) 
                                + target_df['trade_count'].shift(4) 
                                + target_df['trade_count'].shift(5)
                            ) / 6
target_df.rename(columns={'quantity':'avg_qty'}, inplace=True)

# Engineer through coin pair features 
through_df = through_df.groupby(['minute']).agg({'avg_price':'mean','quantity':'mean','trade_count':'count'}).reset_index()
through_df['prev_min_price'] = through_df['avg_price'].shift(1)
through_df['alt_prev_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_min_price']) / through_df['prev_min_price'] * 100
through_df['prev_5_min_price'] = through_df['avg_price'].shift(5)
through_df['alt_prev_5_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_5_min_price']) / through_df['prev_5_min_price'] * 100
through_df['prev_10_min_price'] = through_df['avg_price'].shift(10)
through_df['alt_prev_10_min_perc_chg'] = (through_df['avg_price'] - through_df['prev_10_min_price']) / through_df['prev_10_min_price'] * 100
through_df['alt_prev_5_min_perc_chg_acc_bad'] = (through_df['alt_prev_min_perc_chg'] - through_df['alt_prev_5_min_perc_chg']) / through_df['alt_prev_min_perc_chg'] * 100
through_df['alt_prev_5_min_perc_chg_acc'] = (through_df['alt_prev_min_perc_chg'] - through_df['alt_prev_5_min_perc_chg']) / through_df['alt_prev_min_perc_chg'] * 100
through_df['alt_prev_10_min_perc_chg_acc'] = (through_df['alt_prev_min_perc_chg'] - through_df['alt_prev_10_min_perc_chg']) / through_df['alt_prev_min_perc_chg'] * 100

# Combine features
features_df = pd.merge(target_df, through_df[['minute','alt_prev_min_perc_chg','alt_prev_5_min_perc_chg','alt_prev_10_min_perc_chg','alt_prev_10_min_perc_chg_acc','alt_prev_5_min_perc_chg_acc']], on='minute', how='inner')
features_df.fillna(0, inplace=True)

In [9]:
train_df = features_df[10:-11]
train_df

Unnamed: 0,minute,avg_price,avg_qty,trade_count,prev_min_price,prev_min_perc_chg,prev_5_min_price,prev_5_min_perc_chg,prev_10_min_price,prev_10_min_perc_chg,...,futr_10_min_price,futr_10_min_perc_chg,avg_5_min_price,avg_5_min_qty,avg_5_min_trade_count,alt_prev_min_perc_chg,alt_prev_5_min_perc_chg,alt_prev_10_min_perc_chg,alt_prev_10_min_perc_chg_acc,alt_prev_5_min_perc_chg_acc
10,25421495,0.000133,2732.125000,8,0.000133,0.058313,0.000133,0.205431,0.000133,-0.028127,...,0.000133,-0.268220,0.000133,3704.350872,16.833333,0.012804,-0.046196,0.190354,-1386.696772,460.794217
11,25421496,0.000133,2713.952381,21,0.000133,-0.007190,0.000133,0.270427,0.000133,-0.006669,...,0.000133,-0.307858,0.000133,3586.292935,18.666667,0.006736,-0.065523,0.146283,-2071.752578,1072.777098
12,25421497,0.000133,4520.523810,21,0.000133,-0.091467,0.000133,0.172097,0.000133,-0.001203,...,0.000133,-0.161287,0.000133,3513.537644,19.166667,-0.011567,-0.099604,0.008215,171.018993,-761.113349
13,25421498,0.000133,2127.846154,26,0.000133,0.068058,0.000133,0.170806,0.000133,0.198939,...,0.000133,-0.212304,0.000133,2747.795337,16.833333,0.028132,-0.080193,0.049719,-76.733353,385.056656
14,25421499,0.000133,2821.136364,22,0.000133,-0.103311,0.000133,-0.075704,0.000133,0.030857,...,0.000133,-0.143851,0.000133,2914.787761,18.666667,0.057893,0.094019,0.109485,-89.117010,-62.402762
15,25421500,0.000133,3694.700000,20,0.000133,-0.095582,0.000133,-0.229394,0.000133,-0.024434,...,0.000133,-0.100641,0.000133,3101.713951,19.666667,0.079517,0.160786,0.114517,-44.015552,-102.204220
16,25421501,0.000133,3150.100000,20,0.000133,-0.063919,0.000133,-0.285997,0.000133,-0.016343,...,0.000133,0.044145,0.000133,3171.376451,21.666667,-0.002818,0.151218,0.085595,3137.185949,5465.673491
17,25421502,0.000133,6374.000000,26,0.000133,-0.041762,0.000133,-0.236388,0.000133,-0.064698,...,0.000133,0.166366,0.000133,3781.384388,22.500000,0.033774,0.196632,0.096832,-186.707633,-482.203412
18,25421503,0.000133,2917.333333,9,0.000133,0.021747,0.000133,-0.282558,0.000133,-0.112235,...,0.000133,0.131792,0.000133,3514.185975,20.500000,-0.048183,0.120188,0.039899,182.806256,349.438665
19,25421504,0.000133,2497.583333,12,0.000133,0.025924,0.000133,-0.153555,0.000133,-0.229143,...,0.000133,0.083185,0.000133,3575.808838,18.166667,0.033646,0.095927,0.190036,-464.806950,-185.103807


In [10]:
# Build model
train_df = features_df[10:-11] # hardcoded for futr_10_min outcome
X = train_df[feature_col]
y = train_df[target_col]
score_df = features_df.iloc[-1]
X_scoring = score_df[feature_col]
# standardize and model
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
X_ = scaler.transform(X_scoring.reshape(1, -1))
model = linear_model.SGDRegressor(penalty='l2', alpha=0.15, max_iter=1000)
model.fit(X, y)
predicted_growth = model.predict(X_)

  # This is added back by InteractiveShellApp.init_path()


In [11]:
predicted_growth

array([-0.12301168])