In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import mplfinance as mpf
import Data_for_bp as dfb
import pickle

In [2]:
def ladderize_open(tick_data, grid_size):
    """
    Convert tick data into step-based data using a specified grid size.

    :param tick_data: A pandas Series of tick data.
    :param grid_size: The size of the grid to discretize the tick data.
    :return: A pandas Series of ladderized data.
    """
    ladderized_data = [tick_data.iloc[0]]
    for i in range(1, len(tick_data)):
        if tick_data.iloc[i] > ladderized_data[-1] + grid_size:
            ladderized_data.append(ladderized_data[-1] + grid_size)
        elif tick_data.iloc[i] < ladderized_data[-1] - grid_size:
            ladderized_data.append(ladderized_data[-1] - grid_size)
        else:
            ladderized_data.append(ladderized_data[-1])
    # Adding the final close price
    ladderized_data[-1]=tick_data.iloc[-1]
    return pd.Series(ladderized_data, index=tick_data.index)
def ladderize_absolute(tick_data, grid_size):
    """
    Convert tick data into step-based data using a specified grid size.

    :param tick_data: A pandas Series of tick data.
    :param grid_size: The size of the grid to discretize the tick data.
    :return: A pandas Series of ladderized data.
    """
    # Initialize ladder at the nearest rounded price level based on grid size
    ladderized_data = [(tick_data.iloc[0] / grid_size).round() * grid_size]
    for i in range(1, len(tick_data)):
        current_tick = tick_data.iloc[i]
        last_ladder_level = ladderized_data[-1]
        if current_tick > last_ladder_level + grid_size:
            ladderized_data.append(last_ladder_level + grid_size)
        elif current_tick < last_ladder_level - grid_size:
            ladderized_data.append(last_ladder_level - grid_size)
        else:
            ladderized_data.append(last_ladder_level)
    # Adding the final close price
    ladderized_data[-1] = tick_data.iloc[-1]
    return pd.Series(ladderized_data, index=tick_data.index)

In [3]:
def count_ticks_by_date(tick_data):
    # Group the data by date
    grouped = tick_data.groupby(tick_data.index.date)
    
    # Initialize an empty array to hold the count of ticks for each date
    D = len(grouped)
    T = np.zeros(D)
    
    # Initialize an index variable for populating the T array
    T_index = 0
    
    # Loop through each group and count the number of ticks
    for date, group in grouped:
        T[T_index] = len(group)
        print(f"Date: {date}, Number of ticks: {len(group)}")
        T_index += 1
    
    # Convert the array to integers
    T = T.astype(int)
    return T,D

In [4]:
def B_value(tick_data, grid_size, ladderized_function):
    '''
    Convert tick data to binomial series and also return the original tick data values
    corresponding to the indices where the binomial series exists.
    '''
    ladderized_data = ladderized_function(tick_data, grid_size)
    jumps = dfb.filter_jumps(ladderized_data)
    aggregated_diff = dfb.aggregate_differences(jumps)
    binomial_data = aggregated_diff.diff()
    
    # Drop NaNs from binomial_data and get the corresponding original tick_data values
    binomial_data.dropna(inplace=True)
    original_values_at_binomial_index = tick_data[binomial_data.index]
    
    return binomial_data.to_list(), original_values_at_binomial_index.tolist()


In [10]:
def compute_pnl_t(G,T,n,tick_data,d,position):
    #initialize pnl matrix for day d, T[d] period
    B, ladder_data = B_value(tick_data, G, ladderize_open)
    pnl = np.zeros(len(B))
    for t in range (len(B)-1):
        if B[t] > 0: ################ how to change t when day change>> Can i use if else for T period?
            position-=n
        elif B[t] < 0: ################ lets try one day first this can neglect for now
            position+=n
        else:
            position+=0
        pnl[t+1] = position * G * B[t] + pnl[t]
    pnl_T= pnl[-1] + position*(ladder_data[len(B)-1]-G)
    return pnl,pnl_T,position 

In [6]:
def net_profit(G,T,n,tick_data):
    profit = np.zeros(D)
    for d in range (D):
        if d == 0:
            position = 0
            pnl, pnl_T, position = compute_pnl_t(G, T, n, tick_data,d,position)
            profit[d] = pnl_T
        else:
            pnl, pnl_T, position = compute_pnl_t(G, T, n, tick_data,d,position)
            profit[d] = pnl_T + profit[d-1]
    net_profit = profit[-1] 
    return -net_profit

In [7]:
def position_constraint(params, T, n, tick_data,d,position):
    G = params[0]
    _, _, position = compute_pnl_t(G, T, n, tick_data,d,position)  
    return min(10000000 - abs(position), abs(position) + 10000000)

def pnl_constraint(params, T, n, tick_data,d,position):
    G = params[0]
    pnl, _, _= compute_pnl_t(G, T, n, tick_data,d,position)  
    return min(pnl) + 150000

def profit_constraint(params, T, n, tick_data):
    G = params[0]
    net_profit_value = net_profit(G, T, n, tick_data)
    return net_profit_value + 500000


In [8]:
#fix n constant for now
n = 1000 #fix n
# lets try data of one day
tick_data = dfb.get_tick_data('15 Aug 2023','16 Aug 2023')['EURUSD.mid']
T,D =count_ticks_by_date(tick_data)

2023-09-16 18:39:37,917 - findatapy.market.datavendorweb - INFO - Request Dukascopy data
2023-09-16 18:39:37,918 - findatapy.market.datavendorweb - INFO - About to download from Dukascopy... for EURUSD
2023-09-16 18:39:37,990 - findatapy.market.datavendorweb - INFO - Downloading... 2023-08-15 00:00:00 https://www.dukascopy.com/datafeed/EURUSD/2023/07/15/00h_ticks.bi5
2023-09-16 18:39:43,820 - findatapy.market.datavendorweb - INFO - Completed request from Dukascopy
Date: 2023-08-15, Number of ticks: 108011


BFGS

In [11]:
#optimixation
constraints = [{'type': 'ineq', 'fun': lambda params: position_constraint(params, T, n, tick_data,d=0,position=0)},
               {'type': 'ineq', 'fun': lambda params: pnl_constraint(params, T, n, tick_data,d=0,position=0)},
               {'type': 'ineq', 'fun': lambda params: profit_constraint(params, T, n, tick_data)}]


# Initial guess for G
initial_G = [0.001]

result = minimize(lambda x: -net_profit(x[0], T, n, tick_data),
                  initial_G,
                  constraints=constraints,
                  bounds=[(0, None)])


# The optimal G should now be stored in result.x
optimal_G = result.x[0]
print(optimal_G)

0.011296841591164532


In [12]:
optimal_net_profit = net_profit(optimal_G, T, n, tick_data)
print(f"Optimal net profit: {optimal_net_profit}")

Optimal net profit: 1090.3899669647217


Grid Search

In [16]:
#Grid search
best_profit = float('-inf')
best_G = None

# Custom list of values for G
G_values = [0.0001, 0.001, 0.01, 0.1, 1]

# Grid Search
for G in G_values:
    current_profit = -net_profit(G, T, n, tick_data)
    if current_profit > best_profit:
        best_profit = current_profit
        best_G = G

print(f"Optimal G: {best_G}")
print(f"Optimal net profit: {best_profit}")


Optimal G: 0.01
Optimal net profit: -1090.3899669647217


Nelder-Mead

In [17]:
#Nelder-Mead
from scipy.optimize import minimize

# Your net_profit function and constraints should already be defined

# Initial guess for G
initial_G = [0.001]

# No bounds or constraints are used in Nelder-Mead
result = minimize(lambda x: -net_profit(x[0], T, n, tick_data),
                  initial_G,
                  method='Nelder-Mead',
                  options={'xatol': 1e-8, 'disp': True})

# The optimal G should now be stored in result.x
optimal_G = result.x[0]

print(f"Optimal G found by Nelder-Mead: {optimal_G}")


Optimization terminated successfully.
         Current function value: -1102.899733
         Iterations: 18
         Function evaluations: 36
Optimal G found by Nelder-Mead: 0.0010424804687500008


In [18]:
# Calculate the optimal net profit
optimal_net_profit = net_profit(optimal_G, T, n, tick_data)
print(f"Optimal net profit: {optimal_net_profit}")

Optimal net profit: 1102.8997325897217


Optuna

In [19]:
pip install optuna


Note: you may need to restart the kernel to use updated packages.


In [20]:
import optuna

# Define the objective function that Optuna will optimize
def objective(trial):
    G = trial.suggest_float('G', 0.0001, 1, log=True)  # You can specify the range and distribution for G
    return -net_profit(G, T, n, tick_data)

# Create a study object and specify the direction is to maximize.
study = optuna.create_study(direction='maximize')

# Optimize the study, the objective function is passed in as the first argument.
study.optimize(objective, n_trials=100)  # n_trials specifies the number of iterations

# Results
print(f"The best parameter setting is: {study.best_params}")
print(f"The best net profit is: {-study.best_value}")  # The net_profit was negated in the objective function

# To retrieve the best parameter
best_G = study.best_params['G']

# Calculate the optimal net profit
optimal_net_profit = net_profit(best_G, T, n, tick_data)
print(f"Optimal net profit: {optimal_net_profit}")


[I 2023-09-16 18:58:58,505] A new study created in memory with name: no-name-80155489-c9b6-4525-8f5d-b088c32d3d4a
[I 2023-09-16 18:59:01,248] Trial 0 finished with value: -2198.375096812623 and parameters: {'G': 0.0006283986743992641}. Best is trial 0 with value: -2198.375096812623.
[I 2023-09-16 18:59:03,929] Trial 1 finished with value: -2207.7290786189246 and parameters: {'G': 0.00044178925720461356}. Best is trial 0 with value: -2198.375096812623.
[I 2023-09-16 18:59:07,723] Trial 2 finished with value: -1090.389966964722 and parameters: {'G': 0.05086996690964737}. Best is trial 2 with value: -1090.389966964722.
[I 2023-09-16 18:59:10,558] Trial 3 finished with value: -2219.5816975795024 and parameters: {'G': 0.00028322455219021196}. Best is trial 2 with value: -1090.389966964722.
[I 2023-09-16 18:59:13,427] Trial 4 finished with value: -1102.707230613143 and parameters: {'G': 0.0010264386373684466}. Best is trial 2 with value: -1090.389966964722.
[I 2023-09-16 18:59:16,145] Trial 

The best parameter setting is: {'G': 0.028334074970013723}
The best net profit is: 1090.3899669647215
Optimal net profit: 1090.3899669647215
