In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_ta as ta
import itertools
from tqdm import tqdm
import random
import multiprocessing
import seaborn as sns
import math

In [2]:
def get_df():
    df = pd.read_parquet('data.parquet', engine='pyarrow')
    cols = ['time', 'close']
    df = df[cols]
    return df

In [3]:
def add_sma(df, SMA_short, SMA_long):
    
    df["SMA_short"] = ta.sma(df['close'], SMA_short)
    df["SMA_long"] = ta.sma(df['close'], SMA_long)
        
    df = df.dropna().reset_index(drop=True)
    
    return df

In [4]:
def add_signals(df):
    buy_condition = (df['SMA_short'] >= df['SMA_long']) & (df['SMA_short'].shift(1) < df['SMA_long'].shift(1))
    sell_condition = (df['SMA_short'] < df['SMA_long']) & (df['SMA_short'].shift(1) >= df['SMA_long'].shift(1))

    df['signals_buy'] = df.loc[buy_condition, 'close']
    df['signals_sell'] = df.loc[sell_condition, 'close']

    return df

In [5]:
def get_signals_evaluation(df):

    buys = df["signals_buy"].dropna()
    sells = df["signals_sell"].dropna()
    
    # TODO is overall_profit_multiplier calculated properly?
    return {
        "profit_multiplier": sells.mean() / buys.mean(),
        "buy_sell_trade_pair_count": (buys.count() + sells.count()) / 2,
        "overall_profit_multiplier": (sells.mean() / buys.mean() - 1) * (buys.count() + sells.count()) / 2 + 1
    }

In [6]:
# generate cartiesian product of values in range
# skip equal value pairs for our use case
def get_range_product(start=2, stop=10, step=1, k = math.inf):

    values = list(range(start, stop, step))
    value_pairs = list(itertools.product(values, values))
    # skip same value pairs
    value_pairs = [(x, y) for (x, y) in value_pairs if x != y]
    
    # filter out a sample
    value_pairs = random.sample(value_pairs, k = min(len(value_pairs), k))
    
    return value_pairs

In [7]:
df_raw = get_df()

def evaluate_sma_combination(sma_pair):
    sma_short, sma_long = sma_pair
    
    df = add_signals(add_sma(df_raw, sma_short, sma_long))
    
    result = {
        "SMA_short": sma_short,
        "SMA_long": sma_long
    }
    
    result.update(get_signals_evaluation(df))
    
    return result

def evaluate_sma_combinations(sma_combinations):
    with multiprocessing.Pool() as pool:
        results_list = []
        for result in tqdm(pool.imap_unordered(evaluate_sma_combination, sma_combinations), total=len(sma_combinations)):
            results_list.append(result)
            
    return results_list

In [8]:
start = 2
stop = 41
step = 1
sample_size = math.inf

sma_combinations = get_range_product(start, stop, step, sample_size)

results_list = evaluate_sma_combinations(sma_combinations)

results = pd.DataFrame(results_list)

results_filename = f'data/results_{start}_{stop}_{step}_{sample_size}.pkl'
print(f"Saving to file: {results_filename}")
results.to_pickle(results_filename)

100%|██████████| 1482/1482 [04:12<00:00,  5.88it/s]


Saving to file: data/results_2_41_1_inf.pkl
