In [164]:
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
import logging
import time
import sys

PROJECT_PATH = "/Users/shawn/Documents/personal/rsi_divergence_detector"
sys.path.append(PROJECT_PATH)
# Load the training data
# Replace 'training_data.csv' with your actual data file or DataFrame
df_total = pd.read_pickle(f'{PROJECT_PATH}/data/training_data.pickle')
divergence_data = pd.read_pickle(f"{PROJECT_PATH}/data/divergence_data.pickle")

In [165]:
# Test dataframes sliced
start_time = "2024-11-11"
df = df_total[df_total.timeframe == '15m']
df = df.loc[df.index >= start_time]
# df = df.loc[df.index <= '2024-11-27 00:00:00']
# df = df.loc[df.index >= '2024-11-22 12:00:00']
# df = df.loc[df.index <= '2024-11-22 16:15:00']


dd_filter = divergence_data.copy()
for key, value in dd_filter.items():
    value = value.loc[value.index >= start_time]
    value = value.sort_values('end_datetime').sort_index()
    dd_filter[key] = value
    print(f"{key} filtered data length = {len(value)}")


5m filtered data length = 251
15m filtered data length = 85
1h filtered data length = 28
4h filtered data length = 9
1d filtered data length = 4


## Adding different timeline divergence data

In [148]:
dd_filter['5m'].sort_index()[112:114]

Unnamed: 0_level_0,end_datetime,entry_datetime,entry_price,previous_peak_datetime,divergence,price_change,rsi_change,TP,SL,label,TP_percent,SL_percent,TP_/_SL,profit,div_5m,div_15m,div_1h,div_4h,div_1d
start_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-11-22 13:25:00,2024-11-22 14:55:00,2024-11-22 15:05:00,97487.97,2024-11-22 12:00:00,Bullish Divergence,1102.23,26.32,97819.98,97122.11,True,0.34,0.38,0.91,332.01,True,True,False,False,False
2024-11-22 17:15:00,2024-11-22 19:05:00,2024-11-22 19:15:00,99212.13,2024-11-22 14:55:00,Bearish Divergence,165.32,-0.45,98520.34,99384.62,False,0.7,0.17,4.01,-172.49,True,False,False,False,False


In [140]:
dd_filter['15m'][31:32]

Unnamed: 0_level_0,end_datetime,entry_datetime,entry_price,previous_peak_datetime,divergence,price_change,rsi_change,TP,SL,label,TP_percent,SL_percent,position,TP_vs_SL,TP_/_SL,profit
start_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-11-22 13:15:00,2024-11-22 14:45:00,2024-11-22 15:15:00,97951.99,2024-11-22 07:30:00,Bullish Divergence,933.42,19.24,97999.9,97122.11,True,0.05,0.85,False,0.057731,0.06,47.91


In [141]:
divergence_data = dd_filter.copy()

"""
Compare divergences across different timeframes and mark them accordingly.

For each divergence in a base timeframe, mark the corresponding aligned divergence
in other timeframes based on the alignment of start_datetime.

Parameters:
- divergence_data (dict): A dictionary where keys are timeframe strings (e.g., '5m', '1h')
    and values are Pandas DataFrames indexed by start_datetime.

Returns:
- dict: The updated divergence_data with additional boolean columns indicating divergences
    across different timeframes.
"""
# Define the timeframe to frequency mapping
timeframe_to_freq = {
    '1m': '1T',
    '5m': '5T',
    '15m': '15T',
    '30m': '30T',
    '1h': '1H',
    '4h': '4H',
    '1d': 'D'
}
timeframes = ['1m','5m','15m','30m','1h','4h','1d']

# Step 0: Validation and Preparation
for timeframe, df in divergence_data.items():
    # Check if timeframe is defined in the mapping
    if timeframe not in timeframe_to_freq:
        raise ValueError(f"Timeframe '{timeframe}' is not defined in the frequency mapping.")

    # Ensure the index is a DatetimeIndex
    if not isinstance(df.index, pd.DatetimeIndex):
        raise TypeError(f"The index of dataframe '{timeframe}' must be a DatetimeIndex.")

    # Ensure 'divergence' column exists
    if 'divergence' not in df.columns:
        raise KeyError(f"The dataframe for timeframe '{timeframe}' must contain a 'divergence' column.")

    # Check for duplicate indices
    # if df.index.duplicated().any():
    #     raise ValueError(f"The dataframe for timeframe '{timeframe}' contains duplicate 'start_datetime' indices.")

# Step 1: Initialize 'div_{other_timeframe}' columns as False
for timeframe_key, df in divergence_data.items():
    for compare_key in divergence_data.keys():
        div_col = f"div_{compare_key}"
        if timeframe_key != compare_key:
            if div_col not in df.columns:
                df[div_col] = False
        else:
            if div_col not in df.columns:
                df[div_col] = True

# Step 2: Prepare divergence sets for higher timeframes
# Create a dictionary to hold sets of start_datetime with divergences for each timeframe
divergence_sets = {}
for timeframe, df in divergence_data.items():
    # Extract the set of start_datetime where divergence is True
    divergence_times = set(df.index)
    divergence_sets[timeframe] = divergence_times

# Step 3: Align divergences across timeframes
for t_idx, base_timeframe in enumerate(timeframes):
    if base_timeframe not in divergence_data.keys():
        continue
    base_df = divergence_data[base_timeframe]
    base_div_times = base_df.index

    # Iterate over higher timeframes to align divergences
    for higher_timeframe in timeframes[t_idx+1:]:
        if higher_timeframe not in divergence_data.keys():
            continue
        
        higher_df = divergence_data[higher_timeframe]
        # Get the frequency string for flooring
        freq = timeframe_to_freq[higher_timeframe]

        # Floor the base divergence times to the nearest lower interval of the higher timeframe
        aligned_start_times = base_div_times.floor(freq)

        # Check which aligned_start_times exist in the higher timeframe's divergence set
        mask = aligned_start_times.isin(divergence_sets[higher_timeframe])

        # Debugging: Print lengths
        print(f"Base Timeframe: {base_timeframe}, Higher Timeframe: {higher_timeframe}")
        print(f"Number of base divergences: {len(base_div_times)}")
        print(f"Number of mask values: {len(mask)}")

        # Proceed only if lengths match
        if len(base_div_times) != len(mask):
            raise ValueError(
                f"Length mismatch between base_div_times ({len(base_div_times)}) and mask ({len(mask)}) "
                f"for base timeframe '{base_timeframe}' and higher timeframe '{higher_timeframe}'."
            )

        # Assign the mask to the base DataFrame
        div_higher_col = f"div_{higher_timeframe}"
        base_df[div_higher_col] = mask

        # Assign to the higher DataFrame
        matched_aligned_times = aligned_start_times[mask]
        div_base_col = f"div_{base_timeframe}"
        higher_df.loc[matched_aligned_times, div_base_col] = True



Base Timeframe: 5m, Higher Timeframe: 15m
Number of base divergences: 251
Number of mask values: 251
Base Timeframe: 5m, Higher Timeframe: 1h
Number of base divergences: 251
Number of mask values: 251
Base Timeframe: 5m, Higher Timeframe: 4h
Number of base divergences: 251
Number of mask values: 251
Base Timeframe: 5m, Higher Timeframe: 1d
Number of base divergences: 251
Number of mask values: 251
Base Timeframe: 15m, Higher Timeframe: 1h
Number of base divergences: 85
Number of mask values: 85
Base Timeframe: 15m, Higher Timeframe: 4h
Number of base divergences: 85
Number of mask values: 85
Base Timeframe: 15m, Higher Timeframe: 1d
Number of base divergences: 85
Number of mask values: 85
Base Timeframe: 1h, Higher Timeframe: 4h
Number of base divergences: 28
Number of mask values: 28
Base Timeframe: 1h, Higher Timeframe: 1d
Number of base divergences: 28
Number of mask values: 28
Base Timeframe: 4h, Higher Timeframe: 1d
Number of base divergences: 9
Number of mask values: 9


  aligned_start_times = base_div_times.floor(freq)
  aligned_start_times = base_div_times.floor(freq)
  aligned_start_times = base_div_times.floor(freq)
  aligned_start_times = base_div_times.floor(freq)
  aligned_start_times = base_div_times.floor(freq)
  aligned_start_times = base_div_times.floor(freq)


In [149]:
divergence_data['5m'].sort_index()[112:114]

Unnamed: 0_level_0,end_datetime,entry_datetime,entry_price,previous_peak_datetime,divergence,price_change,rsi_change,TP,SL,label,TP_percent,SL_percent,TP_/_SL,profit,div_5m,div_15m,div_1h,div_4h,div_1d
start_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-11-22 13:25:00,2024-11-22 14:55:00,2024-11-22 15:05:00,97487.97,2024-11-22 12:00:00,Bullish Divergence,1102.23,26.32,97819.98,97122.11,True,0.34,0.38,0.91,332.01,True,True,False,False,False
2024-11-22 17:15:00,2024-11-22 19:05:00,2024-11-22 19:15:00,99212.13,2024-11-22 14:55:00,Bearish Divergence,165.32,-0.45,98520.34,99384.62,False,0.7,0.17,4.01,-172.49,True,False,False,False,False


In [143]:
dd_filter['15m'][31:32]

Unnamed: 0_level_0,end_datetime,entry_datetime,entry_price,previous_peak_datetime,divergence,price_change,rsi_change,TP,SL,label,...,SL_percent,position,TP_vs_SL,TP_/_SL,profit,div_5m,div_15m,div_1h,div_4h,div_1d
start_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-11-22 13:15:00,2024-11-22 14:45:00,2024-11-22 15:15:00,97951.99,2024-11-22 07:30:00,Bullish Divergence,933.42,19.24,97999.9,97122.11,True,...,0.85,False,0.057731,0.06,47.91,True,True,False,False,False


In [154]:
divergence_data = pd.read_pickle(f"{PROJECT_PATH}/data/divergence_data.pickle")


for key, value in divergence_data.items():
    print(f"{key} df have uniques of")
    for col in value.columns:
        if 'div_' in col and col != f'div_{key}':
            print(f"{value[col].value_counts()}")
    print('\n\n')


5m df have uniques of
div_15m
False    19024
True      3188
Name: count, dtype: int64
div_1h
False    21487
True       725
Name: count, dtype: int64
div_4h
False    21990
True       222
Name: count, dtype: int64
div_1d
False    21957
True       255
Name: count, dtype: int64



15m df have uniques of
div_5m
False    5467
True     2836
Name: count, dtype: int64
div_1h
False    7357
True      946
Name: count, dtype: int64
div_4h
False    8172
True      131
Name: count, dtype: int64
div_1d
False    8230
True       73
Name: count, dtype: int64



1h df have uniques of
div_5m
False    1983
True      655
Name: count, dtype: int64
div_15m
False    1847
True      791
Name: count, dtype: int64
div_4h
False    2441
True      197
Name: count, dtype: int64
div_1d
False    2611
True       27
Name: count, dtype: int64



4h df have uniques of
div_5m
False    462
True     314
Name: count, dtype: int64
div_15m
False    580
True     196
Name: count, dtype: int64
div_1h
False    627
True     149
Name: co

## Analyze

In [156]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd

# 서비스 계정 인증
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(f'{PROJECT_PATH}/calcium-ember-444319-n7-3b60cf57e696.json', scope)
client = gspread.authorize(credentials)
# Google Service Account Credentials JSON 파일 경로
spreadsheet = client.open_by_url("https://docs.google.com/spreadsheets/d/1uJy2-CV63Pywc2GJJGRP6fSHXmPuTTybS8bMIWhH4Qc/edit?gid=0#gid=0")


Uploading to Google Sheets

In [157]:
divergence_df2 = divergence_data.copy()
sheets = {}

for timeframe, ddf in divergence_data.items():
    ddf['TP_percent'] = 100 * (ddf['TP'] - ddf['entry_price']) * np.where(ddf['divergence'] == 'Bullish Divergence', 1, -1) / ddf['entry_price'] 
    ddf['SL_percent'] = 100 * (ddf['entry_price'] - ddf['SL']) * np.where(ddf['divergence'] == 'Bullish Divergence', 1, -1) / ddf['entry_price']
    ddf['TP_/_SL'] = ddf['TP_percent'] / ddf['SL_percent']
    is_bullish = np.where(ddf['divergence'] == 'Bullish Divergence', 1, -1)
    ddf['profit'] = np.where(
        ddf['label'],
        is_bullish * (ddf['TP'] - ddf['entry_price']),
        -is_bullish * (ddf['entry_price'] - ddf['SL'])
    )
    if 'future_return' in ddf.columns:
        ddf = ddf.drop(columns=['future_return'])
    ddf[['price_change', 'rsi_change', 'TP', 'SL', 'TP_percent', 'SL_percent', 'TP_/_SL', 'profit']] = ddf[['price_change', 'rsi_change', 'TP', 'SL', 'TP_percent', 'SL_percent', 'TP_/_SL', 'profit']].round(2)
    ddf2 = ddf.copy()
    ddf2['start_datetime'] = ddf2.index
    # start_datetime 열을 가장 왼쪽으로 이동
    ddf2 = ddf2[['start_datetime'] + [col for col in ddf2.columns if col != 'start_datetime']]

    ddf2 = ddf2.astype(str)

    divergence_df2[timeframe] = ddf
    sheets[timeframe] = ddf2

pd.to_pickle(divergence_df2, f"{PROJECT_PATH}/data/divergence_data3.pickle")

for sheet_name, df in sheets.items():
    try:
        worksheet = spreadsheet.worksheet(sheet_name)
    except gspread.exceptions.WorksheetNotFound:
        worksheet = spreadsheet.add_worksheet(title=sheet_name, rows="100", cols="20")
    worksheet.clear()  # 기존 데이터 삭제
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
    
# TODO: in first sheet, add total profits

  worksheet.update([df.columns.values.tolist()] + df.values.tolist())


Analyzing True / False counts and profits

In [163]:
from collections import Counter

for timeframe in ['5m', '15m', '1h', '4h', '1d']:
    print(f"==========Analyzing {timeframe} profit and counts of position==========")
    ddf = divergence_data[timeframe]
    c = Counter(ddf.label)
    profit = 0
    for _, row in ddf.iterrows():
        is_bullish = 1 if row.divergence == "Bullish Divergence" else -1
        if row.label:
            profit += is_bullish * (row.TP - row.entry_price)
        else:
            profit -= is_bullish * (row.entry_price - row.SL)
    
    print(f"{timeframe} Total True / False count = {c}")
    print(f"{timeframe} Total profit = {profit}")
    print('\n')
    
    ddf_11 = ddf.loc[ddf.index >= '2024-11-01']
    c_11 = Counter(ddf_11.label)
    profit_11 = 0
    for _, row in ddf_11.iterrows():
        is_bullish = 1 if row.divergence == "Bullish Divergence" else -1
        if row.label:
            profit_11 += is_bullish * (row.TP - row.entry_price)
        else:
            profit_11 -= is_bullish * (row.entry_price - row.SL)
    print(f"{timeframe} after 24.11 True / False count = {c_11}")
    print(f"{timeframe} after 24.11 profit = {profit_11}")
    print('\n\n')



5m Total True / False count = Counter({False: 15988, True: 6224})
5m Total profit = 76016.79999999994


5m after 24.11 True / False count = Counter({False: 256, True: 95})
5m after 24.11 profit = 10430.9699999999



15m Total True / False count = Counter({False: 6208, True: 2095})
15m Total profit = 49195.749999999636


15m after 24.11 True / False count = Counter({False: 92, True: 41})
15m after 24.11 profit = 14577.050000000047



1h Total True / False count = Counter({False: 2035, True: 601, nan: 1, nan: 1})
1h Total profit = -10464.66000000017


1h after 24.11 True / False count = Counter({False: 32, True: 7})
1h after 24.11 profit = -15634.040000000052



4h Total True / False count = Counter({False: 638, True: 138})
4h Total profit = -51114.69999999999


4h after 24.11 True / False count = Counter({False: 8, True: 2})
4h after 24.11 profit = -10419.919999999998



1d Total True / False count = Counter({False: 118, True: 25})
1d Total profit = -37615.26000000002


1d after 24.11 T

When trading if divergence occur in at least 2 timeframe

In [160]:
divergence_data['5m'].label.value_counts().to_dict()

{False: 15988, True: 6224}

In [162]:
for timeframe in ['5m', '15m', '1h', '4h', '1d']:
    print(f"==========Analyzing {timeframe} profit and counts of position==========")
    ddf = divergence_data[timeframe]
    div_columns = [col for col in df.columns if col.startswith('div_')]
    ddf = ddf[ddf[div_columns].sum(axis=1) >= 2]

    c = ddf.label.value_counts().to_dict()
    profit = 0
    for _, row in ddf.iterrows():
        is_bullish = 1 if row.divergence == "Bullish Divergence" else -1
        if row.label:
            profit += is_bullish * (row.TP - row.entry_price)
        else:
            profit -= is_bullish * (row.entry_price - row.SL)
    
    print(f"{timeframe} Total True / False count = {c}")
    print(f"{timeframe} Total profit = {profit}")
    print('\n')
    
    ddf_11 = ddf.loc[ddf.index >= '2024-11-01']
    c_11 = ddf_11.label.value_counts().to_dict()
    profit_11 = 0
    for _, row in ddf_11.iterrows():
        is_bullish = 1 if row.divergence == "Bullish Divergence" else -1
        if row.label:
            profit_11 += is_bullish * (row.TP - row.entry_price)
        else:
            profit_11 -= is_bullish * (row.entry_price - row.SL)
    print(f"{timeframe} after 24.11 True / False count = {c_11}")
    print(f"{timeframe} after 24.11 profit = {profit_11}")
    print('\n\n')



5m Total True / False count = {False: 3166, True: 908}
5m Total profit = 7948.3699999998025


5m after 24.11 True / False count = {False: 75, True: 7}
5m after 24.11 profit = -5970.460000000065



15m Total True / False count = {False: 2788, True: 860}
15m Total profit = 10520.549999999876


15m after 24.11 True / False count = {False: 28, True: 10}
15m after 24.11 profit = 6820.439999999988



1h Total True / False count = {False: 938, True: 278}
1h Total profit = 3912.389999999894


1h after 24.11 True / False count = {False: 10, True: 3}
1h after 24.11 profit = -5055.180000000037



4h Total True / False count = {False: 382, True: 83}
4h Total profit = -44865.230000000025


4h after 24.11 True / False count = {False: 8}
4h after 24.11 profit = -13636.649999999994



1d Total True / False count = {False: 118, True: 25}
1d Total profit = -37615.26000000002


1d after 24.11 True / False count = {False: 4}
1d after 24.11 profit = -8177.369999999995





## Test