# Исследование по GMTS

In [110]:
import pandas as pd
import numpy as np

In [111]:
# Загрузка 5 минутных данных
df = pd.read_csv('df_m5_TVI_CCI_T3_GHL.csv')
df['tradedate'] = pd.to_datetime(df['tradedate'])

df

Unnamed: 0,tradedate,open,high,low,close,avg_per,tvi,cci,t3,ghl
0,2015-01-05 10:00:00,78450.0,78450.0,73200.0,73930.0,100.00,0,0,0,0
1,2015-01-05 10:05:00,73890.0,74700.0,73660.0,74700.0,100.00,1,0,1,0
2,2015-01-05 10:10:00,74700.0,74720.0,73830.0,73950.0,100.00,1,0,1,0
3,2015-01-05 10:15:00,73970.0,74440.0,73620.0,74430.0,100.00,1,0,1,0
4,2015-01-05 10:20:00,74400.0,74720.0,74220.0,74410.0,100.00,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...
410502,2024-12-17 23:25:00,72680.0,72720.0,72680.0,72690.0,43.72,1,1,1,1
410503,2024-12-17 23:30:00,72680.0,72690.0,72670.0,72690.0,43.74,1,1,1,1
410504,2024-12-17 23:35:00,72700.0,72790.0,72690.0,72790.0,43.86,1,1,1,1
410505,2024-12-17 23:40:00,72800.0,72820.0,72760.0,72810.0,44.03,1,1,1,1


Создание дата фрейма с колонкой признаков открытия позиции Bay или Sell.

In [112]:
# Проверка, являются ли все значения в строке одинаковыми [1, 1, 1, 1] или [-1, -1, -1, -1]
df['IsAllSame'] = df[['tvi', 'cci', 't3', 'ghl']].eq(1).all(axis=1) | \
    df[['tvi', 'cci', 't3', 'ghl']].eq(-1).all(axis=1)

# Находим группы последовательных строк, где IsAllSame == True
df['Group'] = (df['IsAllSame'] != df['IsAllSame'].shift(1)).cumsum()

# Отмечаем только вторую строку в каждой группе, где IsAllSame == True
df['SecondInRow'] = df.groupby('Group').cumcount() == 1
df.loc[~df['IsAllSame'], 'SecondInRow'] = False  # Убираем строки, где IsAllSame == False

# Добавляем колонку 'sig', отмечаем только вторую строку в последовательности
df['sig'] = 0
df.loc[df['SecondInRow'], 'sig'] = 1

# Убираем вспомогательную колонку 'Group', 'IsAllSame', 'SecondInRow'
df.drop(columns=['Group', 'IsAllSame', 'SecondInRow'], inplace=True)

# Добавляем колонку с ценой открытия по сигналу
df['sig_open'] = df.apply(lambda x: x.open if x.sig == 1 else 0, axis=1)

# Добавляем колонку с направлением торговли
conditions = [
    (df['tvi'] == 1) & (df['cci'] == 1) & (df['t3'] == 1) & (df['ghl'] == 1) & (df['sig'] == 1),
    (df['tvi'] == -1) & (df['cci'] == -1) & (df['t3'] == -1) & (df['ghl'] == -1) & (df['sig'] == 1)
]
choices = [1, -1]
df['buy_sell'] = np.select(conditions, choices, default=0)

df

Unnamed: 0,tradedate,open,high,low,close,avg_per,tvi,cci,t3,ghl,sig,sig_open,buy_sell
0,2015-01-05 10:00:00,78450.0,78450.0,73200.0,73930.0,100.00,0,0,0,0,0,0.0,0
1,2015-01-05 10:05:00,73890.0,74700.0,73660.0,74700.0,100.00,1,0,1,0,0,0.0,0
2,2015-01-05 10:10:00,74700.0,74720.0,73830.0,73950.0,100.00,1,0,1,0,0,0.0,0
3,2015-01-05 10:15:00,73970.0,74440.0,73620.0,74430.0,100.00,1,0,1,0,0,0.0,0
4,2015-01-05 10:20:00,74400.0,74720.0,74220.0,74410.0,100.00,1,0,1,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
410502,2024-12-17 23:25:00,72680.0,72720.0,72680.0,72690.0,43.72,1,1,1,1,0,0.0,0
410503,2024-12-17 23:30:00,72680.0,72690.0,72670.0,72690.0,43.74,1,1,1,1,0,0.0,0
410504,2024-12-17 23:35:00,72700.0,72790.0,72690.0,72790.0,43.86,1,1,1,1,0,0.0,0
410505,2024-12-17 23:40:00,72800.0,72820.0,72760.0,72810.0,44.03,1,1,1,1,0,0.0,0


Создаем колонку "profit" в которую записываем:  
если значение в колонке 'buy_sell' равно 1, то из ближайшего последующего значения в колонке 'sig_open' не равного 0, вычесть текущее значение в колонке 'sig_open'  
если значение в колонке 'buy_sell' равно -1, то из текущего значения в колонке 'sig_open', вычесть ближайшее последующее значение в колонке 'sig_open' не равное 0  
иначе, записать в колонку 0

In [113]:
def calculate_profit(row, sig_open_col, buy_sell_col):
    # Индекс текущей строки
    current_index = row.name

    # Значение buy_sell для текущей строки
    buy_sell = row[buy_sell_col]
    sig_open = row[sig_open_col]

    # Если buy_sell равно 1
    if buy_sell == 1:
        # Найти ближайшее последующее значение в sig_open, не равное 0
        for next_value in df.loc[current_index + 1:, sig_open_col]:
            if next_value != 0:
                return next_value - sig_open

    # Если buy_sell равно -1
    elif buy_sell == -1:
        # Найти ближайшее последующее значение в sig_open, не равное 0
        for next_value in df.loc[current_index + 1:, sig_open_col]:
            if next_value != 0:
                return sig_open - next_value

    # Если buy_sell не равно 1 или -1
    return 0

# Применить функцию ко всем строкам
df['profit'] = df.apply(calculate_profit, axis=1, sig_open_col='sig_open', buy_sell_col='buy_sell')

# Результат
df

Unnamed: 0,tradedate,open,high,low,close,avg_per,tvi,cci,t3,ghl,sig,sig_open,buy_sell,profit
0,2015-01-05 10:00:00,78450.0,78450.0,73200.0,73930.0,100.00,0,0,0,0,0,0.0,0,0.0
1,2015-01-05 10:05:00,73890.0,74700.0,73660.0,74700.0,100.00,1,0,1,0,0,0.0,0,0.0
2,2015-01-05 10:10:00,74700.0,74720.0,73830.0,73950.0,100.00,1,0,1,0,0,0.0,0,0.0
3,2015-01-05 10:15:00,73970.0,74440.0,73620.0,74430.0,100.00,1,0,1,0,0,0.0,0,0.0
4,2015-01-05 10:20:00,74400.0,74720.0,74220.0,74410.0,100.00,1,0,1,0,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410502,2024-12-17 23:25:00,72680.0,72720.0,72680.0,72690.0,43.72,1,1,1,1,0,0.0,0,0.0
410503,2024-12-17 23:30:00,72680.0,72690.0,72670.0,72690.0,43.74,1,1,1,1,0,0.0,0,0.0
410504,2024-12-17 23:35:00,72700.0,72790.0,72690.0,72790.0,43.86,1,1,1,1,0,0.0,0,0.0
410505,2024-12-17 23:40:00,72800.0,72820.0,72760.0,72810.0,44.03,1,1,1,1,0,0.0,0,0.0


In [114]:
df['dif_per'] = df['avg_per'].diff().shift(1)
df.dropna(inplace=True)
df

Unnamed: 0,tradedate,open,high,low,close,avg_per,tvi,cci,t3,ghl,sig,sig_open,buy_sell,profit,dif_per
2,2015-01-05 10:10:00,74700.0,74720.0,73830.0,73950.0,100.00,1,0,1,0,0,0.0,0,0.0,0.00
3,2015-01-05 10:15:00,73970.0,74440.0,73620.0,74430.0,100.00,1,0,1,0,0,0.0,0,0.0,0.00
4,2015-01-05 10:20:00,74400.0,74720.0,74220.0,74410.0,100.00,1,0,1,0,0,0.0,0,0.0,0.00
5,2015-01-05 10:25:00,74420.0,74420.0,73600.0,73610.0,100.00,1,0,-1,-1,0,0.0,0,0.0,0.00
6,2015-01-05 10:30:00,73630.0,73910.0,73320.0,73490.0,100.00,1,0,-1,-1,0,0.0,0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410502,2024-12-17 23:25:00,72680.0,72720.0,72680.0,72690.0,43.72,1,1,1,1,0,0.0,0,0.0,-0.09
410503,2024-12-17 23:30:00,72680.0,72690.0,72670.0,72690.0,43.74,1,1,1,1,0,0.0,0,0.0,0.14
410504,2024-12-17 23:35:00,72700.0,72790.0,72690.0,72790.0,43.86,1,1,1,1,0,0.0,0,0.0,0.02
410505,2024-12-17 23:40:00,72800.0,72820.0,72760.0,72810.0,44.03,1,1,1,1,0,0.0,0,0.0,0.12


In [115]:
# Очистка df
df = df[df["profit"] != 0.0]

# Фильтрация строк с марта 2015 и времени до 19:00:00
df = df[
    (df["tradedate"] >= pd.Timestamp("2015-03-01")) & 
    (df["tradedate"].dt.time < pd.to_datetime("19:00:00").time())
]

df = df.copy().reset_index(drop=True)

# Добавление колонки с временем
df['time'] = df['tradedate'].dt.time

df

Unnamed: 0,tradedate,open,high,low,close,avg_per,tvi,cci,t3,ghl,sig,sig_open,buy_sell,profit,dif_per,time
0,2015-03-02 10:05:00,89890.0,90030.0,89550.0,89760.0,39.51,-1,-1,-1,-1,1,89890.0,-1,-170.0,1.06,10:05:00
1,2015-03-02 11:05:00,90060.0,90220.0,89980.0,90060.0,44.70,1,1,1,1,1,90060.0,1,510.0,2.54,11:05:00
2,2015-03-02 12:00:00,90570.0,90580.0,90270.0,90350.0,37.23,1,1,1,1,1,90570.0,1,-450.0,0.43,12:00:00
3,2015-03-02 13:45:00,90120.0,90200.0,90070.0,90140.0,30.32,-1,-1,-1,-1,1,90120.0,-1,220.0,-0.38,13:45:00
4,2015-03-02 14:30:00,89900.0,89930.0,89530.0,89670.0,30.73,-1,-1,-1,-1,1,89900.0,-1,50.0,0.61,14:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23456,2024-12-17 13:20:00,73760.0,73820.0,73730.0,73790.0,37.58,-1,-1,-1,-1,1,73760.0,-1,220.0,-0.24,13:20:00
23457,2024-12-17 14:20:00,73540.0,73540.0,73330.0,73340.0,37.67,-1,-1,-1,-1,1,73540.0,-1,290.0,0.30,14:20:00
23458,2024-12-17 15:45:00,73250.0,73260.0,73150.0,73220.0,42.49,1,1,1,1,1,73250.0,1,-270.0,-0.13,15:45:00
23459,2024-12-17 16:15:00,72980.0,72990.0,72720.0,72770.0,41.44,-1,-1,-1,-1,1,72980.0,-1,370.0,-0.24,16:15:00


In [116]:
df = (
    df.groupby(['time'], as_index=False)
    .agg(
        total_profit=('profit', 'sum'),  # Сумма значений profit
        count_profit=('profit', 'count')  # Количество значений profit
    )
    .sort_values(['total_profit'], ascending=False)
)

df['profit_avg'] = df['total_profit'] / df['count_profit']

df['profit_avg'] = df['profit_avg'].round(2)

print(df.sort_values (by='profit_avg', ascending=False).head(20))

         time  total_profit  count_profit  profit_avg
0    07:00:00       49640.0            52      954.62
36   10:00:00      143030.0           279      512.65
41   10:25:00       29860.0           134      222.84
24   09:00:00       18050.0            86      209.88
126  17:30:00       40380.0           210      192.29
21   08:45:00        3030.0            20      151.50
62   12:10:00       32880.0           221      148.78
64   12:20:00       28520.0           198      144.04
38   10:10:00       36800.0           256      143.75
15   08:15:00        1420.0            10      142.00
123  17:15:00       27590.0           214      128.93
54   11:30:00       22100.0           182      121.43
132  18:00:00       23860.0           197      121.12
131  17:55:00       25960.0           219      118.54
111  16:15:00       24950.0           227      109.91
121  17:05:00       24530.0           235      104.38
6    07:30:00         520.0             5      104.00
74   13:10:00       24550.0 

In [117]:
# # Сохранение в файл
# df.sort_values (by='time', ascending=True).to_excel('исследования.xlsx')