In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as skl
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import norm
import os
import datetime
import random
import scipy.integrate as integrate
import secrets
pd.options.mode.chained_assignment = None  # default='warn'

In [3]:
# Generates a list of floating-point numbers within a specified range
def float_range(start, stop, step=1.0):
    current = start
    array = []
    
    # Loop until 'current' reaches or exceeds 'stop'
    while current < stop:
        array.append(current)
        current += step

    return array

# Calculates the confidence interval of a dataset
def confidenceInterval(data, confidence=99):
    # Calculate sample statistics
    sample_mean = np.mean(data)
    sample_std_dev = np.std(data, ddof=1)  # Sample standard deviation (ddof=1)
    sample_size = len(data)
    standard_error = sample_std_dev / np.sqrt(sample_size)
    
    # Compute the confidence interval using the t-distribution
    confidence_interval = stats.t.interval(confidence / 100, df=sample_size - 1, loc=sample_mean, scale=standard_error)
    
    return confidence_interval

# Performs bootstrap resampling on a dataset
def bootstrap(data, numSim):
    bootstrapSamples = []
    
    # Repeat bootstrap resampling 'numSim' times
    for _ in range(numSim):
        # Randomly sample data with replacement
        bootstrapSample = np.random.choice(data, size=len(data), replace=True)
        bootstrapSamples.append(bootstrapSample)
    
    return bootstrapSamples


In [4]:
df = pd.read_csv("dataset.csv");
df['price'] = (df['pnl'] - df['fee']) / (df['volume'] * df['return'])

In [5]:
# Get unique trader IDs from the DataFrame
traders = df['trader'].unique()

# Initialize a list to store profitable traders
profitableTraders = []

# Identify profitable traders based on the sum of their returns
for trader in traders:
    if df[df['trader'] == trader]['return'].sum() > 0:
        profitableTraders.append(trader)

# Create a directory for storing plots if it doesn't exist
if not os.path.exists('./Plots') and not os.path.isdir('./Plots'):
    os.makedirs('./Plots/')

# Check if the 'Plots' directory is empty
if len(os.listdir('./Plots/')) == 0:
    # Iterate through profitable traders
    for trader in profitableTraders:
        # Extract return data for the current trader
        returnData = np.array(df[df['trader'] == trader]['return'].values.tolist())
        
        # Create a histogram plot using Seaborn
        ax = sns.distplot(returnData, bins=50, kde=True, color='red', hist_kws={"linewidth": 15, 'alpha': 1})
        
        # Set labels for the plot
        ax.set(xlabel='Normal Distribution', ylabel='Frequency')
        
        # Save the plot as an image file in the 'Plots' directory with the trader's name
        plt.savefig("./Plots/" + trader + ".png")


In [6]:
def simulate_trading_strategy(returns_data, initial_capital):
    # Calculate the daily returns
    daily_returns = np.log(1 + returns_data)
    print(daily_returns);

    # Calculate the cumulative returns
    cumulative_returns = np.cumsum(daily_returns, axis=0)

    # Calculate the average daily return and volatility
    avg_daily_return = np.mean(daily_returns, axis=0)
    volatility = np.std(daily_returns, axis=0)

    # Generate random simulations
    simulation_results = []
    for _ in range(1000):
        random_daily_returns = np.random.normal(avg_daily_return, volatility)
        simulation_cumulative_returns = np.cumsum(random_daily_returns) + np.log(initial_capital)
        simulation_results.append(simulation_cumulative_returns)

    return simulation_results

# Example usage
returns_data = np.array([[0.01, 0.02, 0.01], [0.02, 0.01, 0.02], [0.01, 0.01, 0.02]])  # Replace with your actual returns data
initial_capital = 1000000  # Replace with your desired initial capital

simulation_results = simulate_trading_strategy(returns_data, initial_capital)

[[0.00995033 0.01980263 0.00995033]
 [0.01980263 0.00995033 0.01980263]
 [0.00995033 0.00995033 0.01980263]]


In [7]:
def traderPeriod(returns_data, trader, wholeRows=False, drawGraph=False, confidence=-1):
    # Filter data for the specified trader
    data = returns_data[returns_data['trader'] == trader]

    # Initialize variables
    minMonth = maxMonth = minWeek = maxWeek = 0

    # Extract date information from the 'time' column and format it
    data['time'] = data['time'].apply(lambda x: str(x)[0:10])

    # Get unique time scales (dates)
    timescales = data['time'].unique()

    # Define weekly and monthly periods based on unique time scales
    weeklyperiods = []
    monthlyperiods = []

    for timescale in timescales:
        given_date = datetime.datetime.strptime(timescale, "%Y-%m-%d").date()
        one_week_from_given_date = given_date + datetime.timedelta(days=7)
        one_month_from_given_date = given_date + datetime.timedelta(days=30)
        weeklyperiods.append((given_date.strftime("%Y-%m-%d"), one_week_from_given_date.strftime("%Y-%m-%d")))
        monthlyperiods.append((given_date.strftime("%Y-%m-%d"), one_month_from_given_date.strftime("%Y-%m-%d")))

    # Initialize lists to store weekly and monthly return values
    weeklyVals = []
    monthlyVals = []

    # Calculate weekly and monthly returns
    for period in weeklyperiods:
        filteredData = data[(data['time'] >= period[0]) & (data['time'] <= period[1])]
        weeklyVals.append(filteredData['return'].sum())

    for period in monthlyperiods:
        filteredData = data[(data['time'] >= period[0]) & (data['time'] <= period[1])]
        monthlyVals.append(filteredData['return'].sum())

    # Optionally, draw a histogram of weekly returns
    if drawGraph: 
        plt.hist(weeklyVals, bins=float_range(min(weeklyVals), max(weeklyVals) + 0.2, 0.2), rwidth=0.8, color='skyblue', edgecolor='black')
        plt.xlabel('Return value')
        plt.title('Weekly Example')
        plt.show()

    # Optionally, calculate confidence intervals for monthly and weekly returns
    if confidence > 0:
        confidenceIntervalMonthly = confidenceInterval(monthlyVals)
        confidenceIntervalWeekly = confidenceInterval(weeklyVals)
        
        if wholeRows:
            return monthlyVals, weeklyVals, confidenceIntervalMonthly, confidenceIntervalWeekly

        return min(monthlyVals), max(monthlyVals), min(weeklyVals), max(weeklyVals), confidenceIntervalMonthly, confidenceIntervalWeekly

    # Optionally, return full rows of data
    if wholeRows:
        return monthlyVals, weeklyVals

    # Return min and max values for monthly and weekly returns
    return min(monthlyVals), max(monthlyVals), min(weeklyVals), max(weeklyVals)


In [8]:
def tradersPeriod(returns_data, traderList):
    # Initialize lists to store min and max values for weekly and monthly returns
    weeklyMins = []
    weeklyMaxs = []
    monthlyMins = []
    monthlyMaxs = []

    # Iterate through the list of traders
    for trader in traderList:
        # Call the traderPeriod function to get min and max values for returns
        # a: min monthly return, b: max monthly return, c: min weekly return, d: max weekly return
        a, b, c, d = traderPeriod(returns_data, trader)

        # Append min and max values to respective lists
        monthlyMins.append(a)
        monthlyMaxs.append(b)
        weeklyMins.append(c)
        weeklyMaxs.append(d)

    # Return a tuple containing the overall min and max values for weekly and monthly returns
    # Format: ((min weekly min, max weekly max), (min monthly min, max monthly max))
    return (min(weeklyMins), max(weeklyMaxs)), (min(monthlyMins), max(monthlyMaxs))


In [9]:
def findClosestTrader(data, traderToEmulate, userVolume, returnRange=False):
    # Calculate the normalized volume range for all traders
    volumeRangeAll = list(map(lambda x: x / max(df['volume'].values.tolist()), data['volume'].values.tolist()))

    # Get the unique volume range for the trader to emulate
    volumeRangeTrader = data[data['trader'] == traderToEmulate]['volume'].unique()

    # Calculate the closest normalized volume for the user's specified volume
    closestVolumeAll = min(volumeRangeAll, key=lambda x: abs(x - (userVolume / max(data['volume'].values.tolist()))))

    # Calculate the exact volume for the trader to emulate based on the closest normalized volume
    exactVolumeTrader = closestVolumeAll * max(data[data['trader'] == traderToEmulate]['volume'].values.tolist())

    # If returnRange is True, return a sorted list of closest volumes for the trader
    if returnRange:
        return sorted(volumeRangeTrader, key=lambda x: abs(x - userVolume))[:25]

    # Return the closest volume for the trader to emulate
    return min(volumeRangeTrader, key=lambda x: abs(x - exactVolumeTrader))


def findRandomTrader(data, traderToEmulate, returnRange=False):
    # Get the volume range for the trader to emulate
    volumeRangeTrader = data[data['trader'] == traderToEmulate]['volume'].values.tolist()

    # If returnRange is True, return a random sample of volumes from the trader's range
    if returnRange:
        return random.sample(volumeRangeTrader, 25)
    else:
        # Otherwise, return a single random volume from the trader's range
        return min(random.sample(volumeRangeTrader, 25))

In [10]:
def expectedReturnByTrader(data, traderToEmulate, userVolume, returnRange=False, random=False, sortedRange=False):
    # Calculate monthly and weekly returns for the trader
    monthlyReturns, weeklyReturns = traderPeriod(data, traderToEmulate, True)

    # Determine whether to find a random trader volume or the closest volume
    if random:
        closestVolume = findRandomTrader(data, traderToEmulate, returnRange)
    else:
        closestVolume = findClosestTrader(data, traderToEmulate, userVolume, returnRange)

    # If returnRange is True, calculate returns and PnL for each volume in the closestVolume list
    if returnRange:
        returns = {}
        pnls = {}
        for volume in closestVolume:
            # Filter the data for the specified trader and volume
            traderRow = data[(data['volume'] == volume) & (data['trader'] == traderToEmulate)]
            fee = traderRow['fee'].iloc[0]
            price = traderRow['price'].iloc[0]
            pnl = traderRow['pnl'].iloc[0]
            returnVal = traderRow['return'].iloc[0]

            # Create keys for returns and PnL dictionaries
            keyR = f"Price:{round(price * 1000, 2)}e-3 | Pnl:{round(pnl, 2)}"
            keyPnL = f"Price:{round(price * 1000, 2)}e-3 | Return:{round(returnVal, 2)}"

            # Calculate and store returns and PnL
            returns[keyR] = (pnl - fee) / (userVolume * price)
            pnls[keyPnL] = (returnVal * userVolume * price) + fee

        # If sortedRange is True, return sorted dictionaries
        if sortedRange:
            return dict(sorted(returns.items(), key=lambda item: item[1])), dict(sorted(pnls.items(), key=lambda item: item[1]))

        # Return unsorted returns and PnL dictionaries
        return returns, pnls

    else:
        # Calculate returns and PnL for the closest volume
        traderRow = data[(data['volume'] == closestVolume) & (data['trader'] == traderToEmulate)]
        fee = traderRow['fee'].iloc[0]
        price = traderRow['price'].iloc[0]
        pnl = traderRow['pnl'].iloc[0]
        returnVal = traderRow['return'].iloc[0]

        # Calculate and return returns and PnL
        return (pnl - fee) / (userVolume * price), (returnVal * userVolume * price) + fee


In [11]:
def graphReturnByTrader(data, traderToEmulate, userVolume, returnVal="returns", random=False):
    # Calculate expected returns and PnL for the trader and specified userVolume
    returns, pnls = expectedReturnByTrader(data, traderToEmulate, userVolume, True)

    if returnVal == "returns":
        # Calculate the 99% confidence interval for returns
        ranges = confidenceInterval(list(returns.values()), 99)

        # Create a bar plot for returns
        plt.bar(returns.keys(), returns.values(), color='skyblue', edgecolor='black')

        # Add a green line plot to the bar plot
        plt.plot(returns.keys(), returns.values(), color='green', marker='o')

        # Add labels with return values on top of the bars
        for i in range(len(returns.keys())):
            plt.text(list(returns.keys())[i], list(returns.values())[i], str(round(list(returns.values())[i], 2)), ha='center', va='bottom')

        # Add labels and a horizontal line for the confidence interval
        plt.xlabel('Price and PnL')
        plt.axhline(y=ranges[0], color='grey', linestyle='--')
        plt.axhline(y=ranges[1], color='red', linestyle='--')
        plt.title('Range of Possible Returns')

    elif returnVal == "pnls":
        # Calculate the 99% confidence interval for PnL
        ranges = confidenceInterval(list(pnls.values()), 99)

        # Create a bar plot for PnL
        plt.bar(pnls.keys(), pnls.values(), color='skyblue', edgecolor='black')

        # Add a green line plot to the bar plot
        plt.plot(pnls.keys(), pnls.values(), color='green', marker='o')

        # Add labels with PnL values on top of the bars
        for i in range(len(pnls.keys())):
            plt.text(list(pnls.keys())[i], list(pnls.values())[i], str(round(list(pnls.values())[i], 2)), ha='center', va='bottom')

        # Add labels and a horizontal line for the confidence interval
        plt.axhline(y=ranges[0], color='grey', linestyle='--')
        plt.axhline(y=ranges[1], color='red', linestyle='--')
        plt.xlabel('Price and Return')
        plt.title('Range of Possible P&L')

    else:
        # Raise an error for an invalid returnVal argument
        raise ValueError(f"Invalid argument '{returnVal}'. The acceptable arguments are: returns, pnls")

    # Rotate x-axis labels for better readability
    plt.xticks(rotation=90)

    # Display the plot
    plt.show()


In [12]:
def randomPredictions(data, traderToEmulate, userVolume, numSim=25):
    # Find the closest traders with similar volumes to the userVolume
    closestVolume = findClosestTrader(data, traderToEmulate, userVolume, True)
    print(closestVolume)

    # Generate random samples of trader volumes using bootstrap
    randomSamples = bootstrap(closestVolume, numSim)
    
    # Initialize lists to store fees, PnLs, prices, and return values
    fees = []
    pnls = []
    prices = []
    returnVals = []
    
    for volume in closestVolume:
        traderRow = df[(df['volume'] == volume) & (df['trader'] == traderToEmulate)]
        fees.append(traderRow['fee'].iloc[0])
        prices.append(traderRow['price'].iloc[0])
        pnls.append(traderRow['pnl'].iloc[0])
        returnVals.append(traderRow['return'].iloc[0])

    # Create directories for storing prediction results
    traderDirectory = './Plots/' + traderToEmulate + "/"
    if not os.path.exists(traderDirectory):
        os.mkdir(traderDirectory)

    predDirectory = traderDirectory + "Random Predictions/"
    if not os.path.exists(predDirectory):
        os.mkdir(predDirectory)

    directoryPathReturns = predDirectory + "Returns/"
    directoryPathPnLs = predDirectory + 'PnLs/'

    # Clear existing files in the Returns and PnLs directories
    if os.path.exists(directoryPathReturns):
        for file_name in os.listdir(directoryPathReturns):
            file_path = os.path.join(directoryPathReturns, file_name)
            if os.path.isfile(file_path):
                os.remove(file_path)
    else:
        os.mkdir(directoryPathReturns)

    if os.path.exists(directoryPathPnLs):
        for file_name in os.listdir(directoryPathPnLs):
            file_path = os.path.join(directoryPathPnLs, file_name)
            if os.path.isfile(file_path):
                os.remove(file_path)
    else:
        os.mkdir(directoryPathPnLs)

    for j, randomSample in enumerate(randomSamples):
        returns = {}
        pnlsD = {}

        for i, volume in enumerate(randomSample):
            keyR = f"Price:{round(prices[i] * 1000, 2)}e-3 | PnL:{round(pnls[i], 2)}"
            keyPnL = f"Price:{round(prices[i] * 1000, 2)}e-3 | Return:{round(returnVals[i], 2)}"
            returns[keyR] = (pnls[i] - fees[i]) / (volume * prices[i])

            # Write return values to a text file
            with open(directoryPathReturns + str(j + 1) + ".txt", 'a') as file:
                file.write(keyR + " = " + str(returns[keyR]) + "\n")

            pnlsD[keyPnL] = (returnVals[i] * volume * prices[i]) + fees[i]

            # Write PnL values to a text file
            with open(directoryPathPnLs + str(j + 1) + ".txt", 'a') as file:
                file.write(keyPnL + " = " + str(pnlsD[keyPnL]) + "\n")

        # Calculate and write confidence intervals to the text files
        ranges = confidenceInterval(list(returns.values()), 99)
        with open(directoryPathReturns + str(j + 1) + ".txt", 'a') as file:
            file.write(str(ranges) + "\n")

        # Create and save bar plots for returns
        plt.bar(returns.keys(), returns.values(), color='skyblue', edgecolor='black')
        plt.plot(returns.keys(), returns.values(), color='green', marker='o')
        for i in range(len(returns.keys())):
            plt.text(list(returns.keys())[i], list(returns.values())[i], str(round(list(returns.values())[i], 2)),
                     ha='center', va='bottom')
        plt.axhline(y=ranges[0], color='grey', linestyle='--')
        plt.axhline(y=ranges[1], color='red', linestyle='--')
        plt.xlabel('Price and PnL')
        plt.title('Range of Possible Returns')
        plt.xticks(rotation=90)
        plt.savefig(directoryPathReturns + str(j + 1) + ".png", bbox_inches='tight')
        plt.clf()

        # Calculate and write confidence intervals to the text files
        ranges = confidenceInterval(list(pnlsD.values()), 99)
        with open(directoryPathPnLs + str(j + 1) + ".txt", 'a') as file:
            file.write(str(ranges) + "\n")

        # Create and save bar plots for PnLs
        plt.bar(pnlsD.keys(), pnlsD.values(), color='skyblue', edgecolor='black')
        plt.plot(pnlsD.keys(), pnlsD.values(), color='green', marker='o')
        for i in range(len(pnlsD.keys())):
            plt.text(list(pnlsD.keys())[i], list(pnlsD.values())[i], str(round(list(pnlsD.values())[i], 2)),
                     ha='center', va='bottom')
        plt.axhline(y=ranges[0], color='grey', linestyle='--')
        plt.axhline(y=ranges[1], color='red', linestyle='--')
        plt.xlabel('Price and Return')
        plt.title('Range of Possible P&L')
        plt.xticks(rotation=90)
        plt.savefig(directoryPathPnLs + str(j + 1) + ".png", bbox_inches='tight')
        plt.clf()

In [14]:
def pdfReturns(data, traderToEmulate, direct=False, focus=False, reduce=False):
    # Sort the data if 'direct' flag is set to True, otherwise sort trader-specific returns
    if direct:
        returns = sorted(data)
    else:
        returns = sorted(data[data["trader"] == traderToEmulate]["return"].values.tolist())

    # Calculate the 90% confidence interval for the returns
    interval = confidenceInterval(returns, 90)

    # If 'focus' flag is True, consider only returns within the 99.99999999999% confidence interval
    if focus:
        interval = confidenceInterval(returns, 90)
        intervalOverall = confidenceInterval(returns, 99.99999999999)
        returns = [value for value in returns if intervalOverall[0] <= value <= intervalOverall[1]]

    # If 'reduce' flag is True, reduce returns by including values in the tails
    if reduce:
        lower = list(np.linspace(min(returns), interval[0], int(len(returns) * 0.2)))
        higher = list(np.linspace(interval[1], max(returns), int(len(returns) * 0.2)))
        returns = returns + lower + higher
        returns = sorted(returns)

    # Use Gaussian Kernel Density Estimation (KDE) to estimate the Probability Density Function (PDF)
    kde = stats.gaussian_kde(returns)
    pdf_values = kde(returns)

    # Create a dictionary to store return values and their corresponding PDF values
    dic = {}
    for i in range(len(returns)):
        dic[returns[i]] = pdf_values[i]

    # Plot the PDF and highlight the area under the PDF within the confidence interval
    plt.plot(returns, pdf_values, label='PDF')
    plt.fill_between(returns, pdf_values, where=(returns >= interval[0]) & (returns <= interval[1]), color='grey', alpha=0.5, label='Area under the PDF')
    plt.xlabel('Returns')
    plt.ylabel('Probability Density')
    plt.title('Probability Density Function of Trader Returns')
    plt.legend()
    plt.grid()
    plt.show()

    # Return the trader's name and the PDF dictionary
    return traderToEmulate, dic

In [15]:
def pdfPredictions(data, dic, traderToEmulate, userVolume):
    # Initialize an empty dictionary to store P&L values
    pnls = {}

    # Iterate through the keys (returns) and values (PDF values) in the 'dic' dictionary
    for key, value in dic.items():
        # Find the corresponding row in the data for the given return value and trader
        traderRow = df[(df['return'] == key) & (df['trader'] == traderToEmulate)]

        # Check if the traderRow is empty (no matching data found)
        if traderRow.empty:
            continue  # Skip to the next iteration if no data is found
        else:
            # Calculate the P&L (profit and loss) based on the return value and user's volume
            # The key is multiplied by userVolume and (1 / 100) to convert from percentage return to P&L
            pnls[key * userVolume * (1 / 100)] = value

    # Calculate the expected value by summing the product of P&L values and their corresponding keys (returns)
    expected_value = np.sum(np.array(list(pnls.keys())) * np.array(list(pnls.values())))

    # Return the calculated expected value
    return expected_value


In [28]:
def calculate_var_monte_carlo(data, trader, confidence_level=0.05, num_simulations=10000):
    returns = data[data["trader"] == trader]["return"].values.tolist()
    # Calculate the sample size for each simulation
    sample_size = len(returns)
    # Perform Monte Carlo simulation
    simulated_returns = np.random.choice(returns, size=(num_simulations, sample_size), replace=True)
    # Calculate the portfolio values for each simulation
    portfolio_values = np.cumprod(1 + simulated_returns, axis=1)
    # Sort the portfolio values to find the VaR
    sorted_values = np.sort(portfolio_values[:, -1])
    # Calculate the index corresponding to the confidence level
    confidence_index = int(confidence_level * num_simulations)
    # Calculate VaR as the negative value at the confidence index
    var = -sorted_values[confidence_index]
    return var

# Calculate the 5% VaR using Monte Carlo simulation with 10,000 simulations
var_5_monte_carlo = calculate_var_monte_carlo(df, "0xbbd2498a9e42af43062c1de268c8413601e1f8e4", confidence_level=0.95, num_simulations=10000)

print("5% VaR (Monte Carlo):", var_5_monte_carlo)

5% VaR (Monte Carlo): -9.46803720168154e+17


In [44]:
def calculate_cvar_monte_carlo(data, trader, confidence_level=0.05, num_simulations=10000):
    returns = data[data["trader"] == trader]["return"].values.tolist()
    # Calculate the sample size for each simulation
    sample_size = len(returns)
    # Perform Monte Carlo simulation
    simulated_returns = np.random.choice(returns, size=(num_simulations, sample_size), replace=True)
    # Calculate the portfolio values for each simulation
    portfolio_values = np.cumprod(1 + simulated_returns, axis=1)
    # Sort the portfolio values to find the VaR
    sorted_values = np.sort(portfolio_values[:, -1])
    # Calculate the index corresponding to the confidence level
    confidence_index = int(confidence_level * num_simulations)
    # Calculate VaR as the negative value at the confidence index
    var = -sorted_values[confidence_index]
    # Calculate CVaR as the average of losses beyond VaR
    cvar = -np.mean(sorted_values[:confidence_index])
    return cvar

# Calculate the 5% CVaR using Monte Carlo simulation with 10,000 simulations
cvar_5_monte_carlo = calculate_cvar_monte_carlo(df, "0xbbd2498a9e42af43062c1de268c8413601e1f8e4", confidence_level=0.05, num_simulations=10000)

print("5% CVaR (Monte Carlo):", cvar_5_monte_carlo)


5% CVaR (Monte Carlo): 3.1535247671492875e+21
