In [None]:
import pandas as pd
import numpy as np
import time
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Import static data

In [None]:
df1 = pd.read_json("./data/w01-18.json")
df2 = pd.read_json("./data/w19-34.json")
df3 = pd.read_json("./data/w35-49.json")
df4 = pd.read_json("./data/w50-52.json")

print(df1.shape)
print(df2.shape)
print(df3.shape)
print(df4.shape)

In [None]:
df = pd.concat([df1, df2, df3, df4])

In [None]:
df.columns = ["ts", "open", "high", "low", "close", "vol_btc", "vol_cur", "weighted_price"]

In [None]:
df["ts"] = pd.to_datetime(df["ts"], unit="s")
print(df.shape)
print(df.head())
print(df.tail())

#### First plot

In [None]:
plt.figure(figsize=(15,5))
plt.plot(df["ts"], df["open"])

#### Select sub-range

In [None]:
march = df[df.ts > "2017-03-01 00:00:00"]
print(march.head())
print(march.tail())

## Set timestamp as index

In [None]:
df = df.set_index("ts")
print(df.tail())

In [None]:
df.loc["2017-03-01":"2017-03-10"]

## Set week number and weekday

In [None]:
date = datetime(2017,12, 1)
week = date.isocalendar()[1]
print("Week:", week)

weekday = date.isoweekday()
print("Weekday: ", weekday)

#### re-add ts as a column

In [None]:
df["ts"] = df.index
df.tail()

### Functions
* getWeekNumber(date)
* getWeekday(date)

In [None]:
def getWeekNumber(date):
    return date.isocalendar()[1]

def getYear(date):
    return date.isocalendar()[0]

def getWeekday(date):
    return date.isoweekday()

def getYearAndWeek(date):
    y = str(getYear(date)) + (str(getWeekNumber(date)) if getWeekNumber(date)>10 \
        else '0' + str(getWeekNumber(date)))
    return y

#### Add columns
* weekNumber
* weekday

In [None]:
df["weekNumber"] = df["ts"].apply(getWeekNumber)
df["weekday"] = df["ts"].apply(getWeekday)
df["timeRef"] = df["ts"].apply(getYearAndWeek)
df = df.sort_values(by='ts')
df.tail()

In [None]:



#Drop first because E to week 52
# Drop last be

df = df.sort_values(by='ts')
#nbWrongFirst = df.loc[df["weekNumber"] == 52 ].shape[0]
#nbWrongFirst = df.loc[df["weekNumber"] == 52 ].shape[0]
nb2018 = df["2018-01":].shape[0]
aa = df.loc["2017-01-01":"2017-01-10"]
aa = aa[aa["weekNumber"] == 52]
nbWrongFirst = aa.shape[0]
#.loc[df["weekNumber"] == 52 ]

print("Nb2018: ", nb2018)
print("nbWrongFirst: ", nbWrongFirst)
#df[:"2017-01"]
#df[:"2017-01", df["weekNumber"] == 51 ]
#.shape[0]
#[df["weekNumber"] == 51]
df = df[nbWrongFirst:-nb2018]

print("Head: ",df.head())
print("Tail: ",df.tail())
# Drop first because E to week 52
# Drop last b/c E to week 01
#df = df[1:-1]


## Day - get Max&Min percentages 
Example with Week 9 : 27-02 -> 05-03

In [None]:
# Example with week 9
df_week = df[df.weekNumber == 9]

# Get monday, open & end
df_day = df_week[df_week["weekday"] == 1]
openDate = df_day.index[0].replace(hour=5)
endDate = openDate.replace(hour=21)
print("open at: ", openDate, ". End: ", endDate)

open = df_day.loc[openDate]["open"]
print("Open: ", open)

# Get data in the time range [open-end]
time_range = df_day[(df_day["ts"] < endDate) & (df_day["ts"] >= openDate)]
max = time_range["high"].max()
min = time_range["low"].min()
print("Max: ", max)
print("Min: ", min)

# Get percentage
percentageMax = (max - open)/open
percentageMin = (min - open)/open

print("Percentage max: ",(percentageMax*100).round(2), "%")
print("Percentage min: ",(percentageMin*100).round(2), "%")

## Dataframe with percentage by hours

In [None]:
def getPercentageByDay(df_day, beginHour, endHour):
    #high = getHighPercentageByDay(df_day, beginHour, endHour)
    #low = getLowPercentageByDay(df_day, beginHour, endHour)
    high, low = getHighAndLowPercentageByDay(df_day, beginHour, endHour)
    return high, low

def getHighAndLowPercentageByDay(df_day, beginHour, endHour):
    openDate = df_day.index[0].replace(hour=beginHour)
    open = df_day.loc[openDate]["open"]

    high_day_serie = pd.Series([])
    low_day_serie = pd.Series([])
    for i in range(beginHour, endHour+1):
        date = df_day.index[0].replace(hour=i)
        
        high = df_day.loc[date]["high"]
        low = df_day.loc[date]["low"]
        
        high_percentage = getPercentage(open, high)
        low_percentage = getPercentage(open, low)
        
        high_day_serie = high_day_serie.append(pd.Series([high_percentage]), ignore_index=True)
        low_day_serie = low_day_serie.append(pd.Series([low_percentage]), ignore_index=True)
    
    high_day_serie = high_day_serie.rename(df_day.index[0])
    low_day_serie = low_day_serie.rename(df_day.index[0])
    
    return high_day_serie, low_day_serie

def getLowPercentageByDay(df_day, beginHour, endHour):
    openDate = df_day.index[0].replace(hour=beginHour)
    open = df_day.loc[openDate]["open"]
    low_day_serie = pd.Series([])
    for i in range(beginHour, endHour+1):
        date = df_day.index[0].replace(hour=i)
        low = df_day.loc[date]["low"]
        low_percentage = getPercentage(open, low)
        low_day_serie = day_serie.append(pd.Series([low_percentage]), ignore_index=True)
    low_day_serie = low_day_serie.rename(df_day.index[0])
    return day_serie
    

def getPercentage(open, high):
    percentage = (high - open)/open
    percentage = (percentage*100).round(2)
    return percentage


In [None]:
### Test with Monday on Week 9
def __main__():
    df_week = df[df.weekNumber == 9]
    df_day = df_week[df_week["weekday"] == 1]
    openHour = 6
    endHour = 18
    df_p = pd.DataFrame()
    
    high, low = getPercentageByDay(df_day, openHour, endHour)
    df_p = df_p.append(high)
    df_p = df_p.append(low)
    return df_p
    
__main__().tail()

In [None]:
def getPercentageByWeek(df_week, openHour, endHour):
    dfs = {}
    df_high_percentage = pd.DataFrame()
    df_low_percentage = pd.DataFrame()
    for i in range(1,8):
        df_day = df_week[df_week["weekday"] == i]
        df_high_percentage_day, df_low_percentage_day = getPercentageByDay(df_day, openHour, endHour)
        
        df_high_percentage = df_high_percentage.append(df_high_percentage_day)
        df_low_percentage = df_low_percentage.append(df_low_percentage_day)
    df_high_percentage.columns = range(openHour, endHour+1)
    df_low_percentage.columns = range(openHour, endHour+1)
    dfs["highs"] = df_high_percentage
    dfs["lows"] = df_low_percentage
    #return df_high_percentage, df_low_percentage
    return dfs

In [None]:
def __main__():
    df_week = df[df.weekNumber == 9]
    openHour = 5
    endHour = 18
    dfs = getPercentageByWeek(df_week, openHour, endHour)
    return dfs
d = __main__()
d["lows"]

In [None]:
sns.heatmap(d["lows"])

In [None]:
def getHighLowDF(df, openHour,endHour):
    df_year_high = pd.DataFrame()
    df_year_low = pd.DataFrame()
    for i in range(1,53):
        df_week = df[df.weekNumber == i]
        df_week_percentage = getPercentageByWeek(df_week, openHour, endHour)
        
        df_year_high = df_year_high.append(df_week_percentage["highs"])
        df_year_low = df_year_low.append(df_week_percentage["lows"])
    return {"highs": df_year_high, "lows": df_year_low}

openHour = 6
endHour = 18
df_year = getHighLowDF(df, openHour, endHour)
df_year["highs"].tail()

## Draw heatmaps

In [None]:
plt.figure(figsize=(20,30))
sns.heatmap(df_year["lows"], vmin=-8, vmax=0)

---
# Part 2. Process gain 

In [None]:
def isRowWinning(df, index, openHour, endHour, high_rate, low_rate):
    for i in range(openHour, endHour+1):
        high = df["highs"].loc[index][i]
        low  = df["lows"].loc[index][i]
        if(high > high_rate):
            return 1
        elif(low < low_rate):
            return -1
    return 0

In [None]:
# Test

high_rate = 1.4
low_rate = -8
fees_rate = 0.25/100
#openHour = 5
#endHour = 18

nbWins = 0
nbLoses = 0

#print(df_year)
df_wins = pd.DataFrame()
for index, row in df_year["highs"].iterrows():
    res = isRowWinning(df_year, index, openHour, endHour, high_rate, low_rate)
    open = df.loc[index]["open"]
    high = df_year["highs"].loc[index].max()
    low = df_year["lows"].loc[index].min()
    
    serie = pd.Series([res, open, high, low], index=["win", "open", "high", "low"])
    serie = serie.rename(index)
    df_wins = df_wins.append(serie)
            
nbDays = df_year["highs"].shape[0]
missing = nbDays - (nbWins+nbLoses)
print(nbWins, nbLoses, nbWins + nbLoses,  missing)        
print(df_wins.head())
plt.figure(figsize=(15,15))
df_wins.loc[:"2017-02-01", "win"].plot()

In [None]:
def getWins(df, openHour, endHour, high_rate, low_rate): 
    df_wins = pd.DataFrame()
    for index, row in df["highs"].iterrows():
        res = isRowWinning(df, index, openHour, endHour, high_rate, low_rate)
        #open = df.loc[index]["open"]
        high = df["highs"].loc[index].max()
        low = df["lows"].loc[index].min()

        serie = pd.Series([res, high, low], index=["win", "high", "low"])
        serie = serie.rename(index)
        df_wins = df_wins.append(serie)
        #print(res)
    #print(df_wins)
    return df_wins


In [None]:
## Test

high_rate = 1.5
low_rate = -6
fees_rate = 0.25/100

nbWins = 0
nbLoses = 0

df_wins = getWins(df_year,openHour, endHour,  high_rate, low_rate)

nbDays = df_year["highs"].shape[0]
missing = nbDays - (nbWins+nbLoses)
print(nbWins, nbLoses, nbWins + nbLoses,  missing)        
print(df_wins.head())
plt.figure(figsize=(15,15))

df_wins.loc[:"2017-02-01", "win"].plot()

In [None]:
def getAmountAfterTransaction(amount, fees_rate, rate):
    amount_after_buying = amount * (1 - fees_rate)
    amount_before_selling = amount_after_buying * (1 + rate/100)
    amount_after_selling = amount_before_selling * (1 - fees_rate)
    diff = amount_after_selling - amount
    return diff

In [None]:
## Test
high_rate = 1.9
low_rate = -4
fees_rate = 0.25/100
print(getAmountAfterTransaction(1000, fees_rate, high_rate))
getAmountAfterTransaction(1000, fees_rate, low_rate)

In [None]:
def getGain(df_wins, amount, fees_rate, high_rate, low_rate):
    gain = 0
    for index,row in df_wins.iterrows():
        win = row["win"]
        rate = high_rate
        if(win < 1):
            rate = low_rate
        gain = gain + getAmountAfterTransaction(amount, fees_rate, rate)
    return gain

In [None]:
# Test

amount = 1000
high_rate = 1.9
low_rate = -4
fees_rate = 0.25/100
openHour = 6
endHour = 19

df_year = getHighLowDF(df, openHour, endHour)
df_wins = getWins(df_year,openHour, endHour,  high_rate, low_rate)
getGain(df_wins["2017-12":], amount, fees_rate, high_rate, low_rate)
        

## Hyper tuning

In [None]:
amount = 1000
high_rate = 1.9
low_rate = -4
fees_rate = 0.25/100
openHour = 6
endHour = 19



df_gains = pd.DataFrame()

low_rates = range(-2, -7, -1)
high_rates = np.arange(1.0, 2.0, 0.1)
openHours = range(2,10,1)
endHours = range(16,18,1)

for low_rate in low_rates:
    for high_rate in high_rates:
        for openHour in openHours:
            df_year_high, df_year_low = getHighLowDF(df, openHour, endHour)
            df_wins = getWins(df_year_high, df_year_low, openHour, endHour,  high_rate, low_rate)
            gain = getGain(df_wins["2017-12":], amount, fees_rate, high_rate, low_rate)

            index = str(low_rate)+"%>"+str(high_rate)+"%/"+str(openHour)+"h-"+str(endHour)+"h"
            serie = pd.Series(
                [gain, low_rate, high_rate, openHour, endHour, index], 
                index=["gain", "low_rate", "high_rate", "openHour", "endHour", "ticks"])
            serie = serie.rename(index)
            print(index, gain,"$")
            df_gains = df_gains.append(serie)

plt.figure(figsize=(20,10))
print("--")
print(df_gains.sort_values(by="gain")["gain"])
#plot = df_gains["gain"].plot(xticks=df_gains.index, rot=45)
#sns.heatmap(df_gains["gain"], annot=True)
plot = df_gains["gain"].plot(rot=45)
#df_gains.gain
#plot.set_xticklabels(df_gains["ticks"])
#print(df_gains.index)
#print(df_gains["ticks"].values)
#plot.set_xticks(df_gains.index)
#plot.tick_params(direction='out', length=6, width=2, colors='r')
#plot.set_xticklabels(df_gains.index, rotation=45)

## Explore with all year data

In [None]:
#for i in range(2,12):
#    plt.figure(figsize=(20,10))
#    sns.heatmap(df_year_high.loc["2017-"+str(i)+"-01":"2017-"+str(i+1)+"-01"], vmin=0, vmax=1.4)



In [None]:
plt.figure(figsize=(20,10))
dff = df_wins.loc["2017-12":, "win"] 
ax = dff.plot(xticks=dff.index, rot=45)
ax.set_xticklabels(dff.index);

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(df_year_high.loc["2017-12-01":"2018-01-01"], vmin=0, vmax=1.4, annot=True)
plt.figure(figsize=(20,10))
sns.heatmap(df_year_low.loc["2017-12-01":"2018-01-01"], vmin=-7, vmax=0, annot=True)

In [None]:
plt.figure(figsize=(20,10))
df_wins.loc["2017-12":,"win"].plot()