In [49]:
import pandas as pd
import numpy as np

In [50]:
df = pd.read_csv("avaxdata.csv")
df = df.drop(["Unnamed: 0", "symbol"], axis=1)
df = df.iloc[::-1]
df.head()

Unnamed: 0,id,price,quoteQty,isBestMatch,qty,time,isBuyerMaker
33079,163591857,96.89,60.0718,True,0.62,2022-04-04T07:24:58.226000,False
33078,163591858,96.89,42.6316,True,0.44,2022-04-04T07:25:01.652000,True
33077,163591859,96.9,775.2,True,8.0,2022-04-04T07:25:01.668000,False
33076,163591860,96.91,95.9409,True,0.99,2022-04-04T07:25:02.389000,False
33075,163591861,96.91,14.5365,True,0.15,2022-04-04T07:25:02.838000,False


In [51]:
df['price_SMA5'] = df["price"].rolling(5).mean()
df['price_SMA10'] = df["price"].rolling(10).mean()
df['qty_SMA5'] = df["qty"].rolling(5).mean()
df['qty_SMA10'] = df["qty"].rolling(10).mean()

df["pricechange"] = df["price"].pct_change()
df["quantitychange"] = df["qty"].pct_change()
df["targetshift"] = df["pricechange"].shift(-5) # maybe it has an effect later
df["target"] = np.sign(df["targetshift"])
df["time"] = pd.to_datetime(df["time"])
df.tail()

timeframeofwindow = (df.iloc[-1]["time"] - df.iloc[0]["time"]).total_seconds() / 60 / 60
print("we are looking at a timewindow of hours: ", timeframeofwindow)

we are looking at a timewindow of hours:  5.944690277777777


In [52]:
df.corr()["target"]

id               -0.005303
price             0.004375
quoteQty          0.003809
isBestMatch            NaN
qty               0.003844
isBuyerMaker     -0.086890
price_SMA5        0.003888
price_SMA10       0.003358
qty_SMA5          0.000898
qty_SMA10        -0.000153
pricechange       0.021191
quantitychange   -0.007166
targetshift       0.649543
target            1.000000
Name: target, dtype: float64

In [53]:
df = df.fillna(0)
# X = df.to_numpy()
df.replace(np.inf, 999, inplace=True)
df.replace(-np.inf, -999, inplace=True)

y = df["target"]
df.drop(["target", "time"], inplace = True, axis = 1)

In [54]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df, y, test_size=0.2, shuffle=True)

In [55]:
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

reg = make_pipeline(StandardScaler(),SGDClassifier())
# reg = SGDClassifier()
reg.fit(x_train, y_train)

score = reg.score(x_test, y_test)
print("score is: ", score)

predlast = reg.predict(df.iloc[-1].to_numpy().reshape(1, -1))

print("for last one i predict: ", predlast)

score is:  0.9992442563482467
for last one i predict:  [0.]




In [56]:
# train it on all
reg.fit(df, y)

predictions = reg.predict(df)
np.unique(predictions, return_counts = True)

(array([-1.,  0.,  1.]), array([ 4660, 23821,  4599]))

In [57]:
def oneSim(combtup):
    lookbackpred, minhold = combtup
    money = 10000
    stocks = 0
    holdingfor = 0

    for i in range(lookbackpred, len(df)):
        if money <= 10 and stocks == 0:
            # we can skip
#             break
            # game over du spasst
            return lookbackpred, minhold, money
        crntPrice = df.iloc[i]["price"]
        crntPred = np.median(predictions[i-lookbackpred : i])
        if crntPred == 1 and money > 10 and stocks == 0:
            # buy
            amount = money / crntPrice * .95
            cost = amount * crntPrice * (1.00025) # commission
            money -= cost
            stocks = amount
            holdingfor = 0
        elif crntPred == -1 and stocks > 0 and holdingfor > minhold:
            win = amount * crntPrice * (1 - 0.00025)
            money += win
            stocks = 0
        holdingfor += 1
    # last day sell if own stocks
    if stocks > 0:
        win = amount * crntPrice * (1 - 0.00025)
        money += win
        stocks = 0

#     print("i have earned: ", money)
    return lookbackpred, minhold, money



In [58]:
from tqdm import tqdm
from multiprocessing import Pool

combinations = []
for lookbackpred in tqdm([1, 5, 10, 50, 100, np.random.randint(300), np.random.randint(300)]):
    for minhold in [1, 5, 10, 50, 100, np.random.randint(300), np.random.randint(300)]:
            combinations.append((lookbackpred, minhold))
print("trying out %d combinations" % len(combinations))
pool = Pool()
result = pool.map_async(oneSim, combinations)
result = result.get()
# money = oneSim(lookbackpred, minhold)
# print(lookbackpred, minhold,  money)
print(result)

100%|█████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 32513.98it/s]

trying out 49 combinations





[(1, 1, 5349.243300127558), (1, 5, 6395.568652707036), (1, 10, 6880.054690999394), (1, 50, 8530.226948628391), (1, 100, 9060.386140130111), (1, 2, 5646.5602934210265), (1, 254, 9487.011653197302), (5, 1, 10181.762732993806), (5, 5, 10173.992179603274), (5, 10, 10166.370434007613), (5, 50, 10069.931725310767), (5, 100, 10042.229927664386), (5, 71, 10042.80870768078), (5, 95, 10034.638953727284), (10, 1, 9983.840373240762), (10, 5, 9983.840373240762), (10, 10, 9976.999812359765), (10, 50, 9819.464764267817), (10, 100, 9746.113137901599), (10, 77, 9808.721267565192), (10, 74, 9808.721267565192), (50, 1, 10000), (50, 5, 10000), (50, 10, 10000), (50, 50, 10000), (50, 100, 10000), (50, 231, 10000), (50, 63, 10000), (100, 1, 10000), (100, 5, 10000), (100, 10, 10000), (100, 50, 10000), (100, 100, 10000), (100, 81, 10000), (100, 33, 10000), (181, 1, 10000), (181, 5, 10000), (181, 10, 10000), (181, 50, 10000), (181, 100, 10000), (181, 238, 10000), (181, 91, 10000), (215, 1, 10000), (215, 5, 1000

In [59]:
results = pd.DataFrame(result, columns=["lookback", "minhold", "win"]).sort_values("win", ascending = False)
results.head(20)

Unnamed: 0,lookback,minhold,win
7,5,1,10181.762733
8,5,5,10173.99218
9,5,10,10166.370434
10,5,50,10069.931725
12,5,71,10042.808708
11,5,100,10042.229928
13,5,95,10034.638954
36,181,5,10000.0
35,181,1,10000.0
24,50,50,10000.0


In [60]:
# calculate win per hour
# timeframeofwindow
bestwin = results.iloc[0]["win"] - 10000
winPerHour = bestwin / timeframeofwindow
winPctPerHour = winPerHour / 10000
print("du spasst hättest %.2f$ pro stunde gemacht oder %.2f pct" % (winPerHour, winPctPerHour * 100))

du spasst hättest 30.58$ pro stunde gemacht oder 0.31 pct


In [62]:

def hochrechnung(startinvest = 10000):
    monthly = []
    alle = []
    # winPctPerHour
    money = startinvest
    hoursinyear = 8760
    for i in range(hoursinyear):
        money *= (1 + winPctPerHour)
        alle.append(money)
        if i % 720 == 0: # every month
            monthly.append(money)
            print(f'month {int(i / 720)} has a win of {round(money,2)}$')
    return monthly, alle 
monthly, alle  = hochrechnung()

month 0 has a win of 10030.58$
month 1 has a win of 90353.13$
month 2 has a win of 813880.33$
month 3 has a win of 7331247.79$
month 4 has a win of 66038202.82$
month 5 has a win of 594857022.33$
month 6 has a win of 5358335961.74$
month 7 has a win of 48266664427.11$
month 8 has a win of 434775070386.23$
month 9 has a win of 3916354363264.79$
month 10 has a win of 35277624094311.36$
month 11 has a win of 317772766788670.75$
month 12 has a win of 2862424381034502.5$


In [68]:
# sabve model pipeline
import joblib
joblib.dump(reg, "results/modelavax.pickle", compress=9)
with open("results/bestcombination.csv", "w") as file:
    file.write("lookback,minhold,win\n")
    file.write(str(results.iloc[0]["lookback"]) + ","  + str(results.iloc[0]["minhold"]) + ","  + str(results.iloc[0]["win"]))