In [3]:
import csv
import statistics
import numpy as np
from datetime import datetime
from collections import defaultdict
from scipy.stats import kurtosis, skew, linregress
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import matplotlib.dates as mdates
import os
import random
import csv

In [4]:
df = pd.read_csv('./datasets/98 final.csv')
# df = pd.read_csv('./datasets/98 gains final.csv')
# df = pd.read_csv('./datasets/98 mux.csv')
df = df.loc[:, df.columns.intersection(["block_time", "address", "volume", "fee", "pnl", "return", "liquidation"])]
# openDF = 'unfiltered_open_and_close.csv'
# collateralDF = 'gmx_collateral_data.csv'
df["block_time"] = df["block_time"].apply(lambda x: x[:10])

In [5]:
def normalize_values(values):
    min_value, max_value = min(values), max(values)
    normalized_values = [(value - min_value) / (max_value - min_value) for value in values]
    return normalized_values

def generate_weights(n, min_value=0.0, max_value=1.0):
    if min_value * n > 1 or max_value * n < 1:
        return "Weights are not feasible."

    remaining_weights = 1
    weights = []

    for _ in range(n - 1):
        weight = random.uniform(min_value, min(remaining_weights, max_value))
        weights.append(weight)
        remaining_weights -= weight

    weights.append(remaining_weights)  # Add the last weight to ensure they sum to 1
    random.shuffle(weights)  # Shuffle the weights to randomize their order

    return weights

In [6]:
def regressionModel(tradesData, tradersData):
    
    cumulativeReturnTraders = {};
    
    for i in range(len(dfGrade)):
        address = dfGrade.iloc[i]['Unnamed: 0'];
        tradeReturn = dfTrade.loc[dfTrade['address'] == address]['return'].sum();
        cumulativeReturnTraders[tradeReturn] = [dfGrade.iloc[i]['Distribution Test'], dfGrade.iloc[i]['Normal Test'], dfGrade.iloc[i]['Risk Test'], dfGrade.iloc[i]['Profitability Test']];
    
    y = [key for key in cumulativeReturnTraders.keys()];
    
    XDist = np.array([item[0] for key, item in cumulativeReturnTraders.items()]).reshape(-1, 1);
    XDist_train, XDist_test, y_train, y_test = train_test_split(XDist, y, test_size=0.3, random_state=1)
    regDist = linear_model.LinearRegression()
    regDist.fit(XDist_train, y_train)
    scoreDist = mean_absolute_error(regDist.predict(XDist_test), y_test)
    
    XNorm = np.array([item[1] for key, item in cumulativeReturnTraders.items()]).reshape(-1, 1);
    XNorm_train, XNorm_test, y_train, y_test = train_test_split(XNorm, y, test_size=0.3, random_state=1)
    regNorm = linear_model.LinearRegression()
    regNorm.fit(XNorm_train, y_train)
    scoreNorm = mean_absolute_error(regNorm.predict(XNorm_test), y_test)
    
    XRisk = np.array([item[2] for key, item in cumulativeReturnTraders.items()]).reshape(-1, 1);
    XRisk_train, XRisk_test, y_train, y_test = train_test_split(XRisk, y, test_size=0.3, random_state=1)
    regRisk = linear_model.LinearRegression()
    regRisk.fit(XRisk_train, y_train)
    scoreRisk = mean_absolute_error(regRisk.predict(XRisk_test), y_test)
    
    XProf = np.array([item[3] for key, item in cumulativeReturnTraders.items()]).reshape(-1, 1);
    XProf_train, XProf_test, y_train, y_test = train_test_split(XProf, y, test_size=0.3, random_state=1)
    regProf = linear_model.LinearRegression()
    regProf.fit(XProf_train, y_train)
    scoreProf = mean_absolute_error(regProf.predict(XProf_test), y_test)
    
    if min(scoreDist, scoreNorm, scoreRisk, scoreProf) == scoreDist:
        return "Distribution Test is the best for the given data";
    elif min(scoreDist, scoreNorm, scoreRisk, scoreProf) == scoreNorm:
        return "Normal Test is the best for the given data";
    elif min(scoreDist, scoreNorm, scoreRisk, scoreProf) == scoreRisk:
        return "Risk Test is the best for the given data";
    else:
        return "Profitability Test is the best for the given data";

In [7]:
class RegressionTest:
    
    def __init__(self, data):
        self.data = data
    
    def calculate_regression_coefficient(self, address):
        df_filtered = self.data[self.data["address"] == address]
        x = np.arange(1, len(df_filtered) + 1)
        y = np.cumsum(df_filtered['return'])
        slope, intercept, r_value, p_value, std_err = linregress(x, y)
        normalized_slope = (np.rad2deg(np.arctan(slope)) + 90) / 180
        coefficient = (r_value ** 2 * normalized_slope) / (std_err + 1)
        coefficient /= (1 - p_value) if 1 - p_value != 0 else 1
        return coefficient

    def Normalized_Regression_coefficient(self):
        addresses = self.data["address"].unique()
        regression_coefficient_values = [self.calculate_regression_coefficient(address) for address in addresses]
        normalized_regression_coefficient_values = normalize_values(regression_coefficient_values)
        return dict(zip(addresses, normalized_regression_coefficient_values))
    
    def addToCSV(self, file):
        data = []
        NRC = self.Normalized_Regression_coefficient()
        for i in range(len(NRC)):
            dic = {}
            dic["Address"] = list(NRC.keys())[i]
            dic["Normalized Regression Coefficient"] = list(NRC.values())[i]
            data.append(dic)
        with open(file, 'w', newline='') as file:
            writer = csv.DictWriter(file, fieldnames=data[0].keys())
            writer.writeheader()
            writer.writerows(data)
            
RT = RegressionTest(df)

In [8]:
class RiskTest:
    
    def __init__(self, data):
        self.data = data
    
    def calculate_negative_CV_returns(self, address):
        returns = self.data[self.data["address"] == address]["return"].values.tolist()
        negative_returns = [r for r in returns if r < 0]
        mean_negative_returns = np.mean(negative_returns)
        std_dev_negative_returns = np.std(negative_returns)
        cv_negative_returns = std_dev_negative_returns / mean_negative_returns
        return cv_negative_returns
        
    def Normalized_CV_negative_returns(self):
        addresses = self.data["address"].unique()
        cv_negative_returns = [self.calculate_negative_CV_returns(address) for address in addresses]
        normalized_cv_negative_returns = normalize_values(cv_negative_returns)
        return dict(zip(addresses, normalized_cv_negative_returns))

    def calculate_var(self, address, confidence_level):
        df_filtered = self.data[self.data["address"] == address]
        returns = np.array(df_filtered["return"])
        sorted_returns = np.sort(returns)
        index = int(np.floor(confidence_level * len(sorted_returns)))
        var = sorted_returns[index]
        return var

    def Normalized_VaR(self, confidence_level=0.05):
        addresses = self.data["address"].unique()
        var_values = [self.calculate_var(address, confidence_level) for address in addresses]
        normalized_var_values = normalize_values(var_values)
        return dict(zip(addresses, normalized_var_values))
    
    def calculate_cvar(self, address, confidence_level):
        df_filtered = self.data[(self.data["address"] == address) & (self.data["pnl"] < 0)]
        returns = np.array(df_filtered["return"])
        cvar = np.nan if len(returns) == 0 else np.mean(returns)
        return cvar

    def Normalized_cVaR(self, confidence_level=0.05):
        addresses = self.data["address"].unique()
        cvar_values = [self.calculate_cvar(address, confidence_level) for address in addresses]
        normalized_cvar_values = normalize_values(cvar_values)
        return dict(zip(addresses, normalized_cvar_values))
    
    def calculate_liquidation_chance(self, address):
        df_filtered = self.data[self.data["address"] == address]
        total_trades = len(df_filtered)
        liquidation_counts = df_filtered["liquidation"].apply(lambda x: 1 if x == "Yes" else 0).sum()
        liquidation_chance = liquidation_counts / total_trades
        return liquidation_chance

    def Normalized_Liquidation_chance(self):
        addresses = self.data["address"].unique()
        liquidation_chance_values = [self.calculate_liquidation_chance(address) for address in addresses]
        normalized_liquidation_chance_values = normalize_values(liquidation_chance_values)    
        return dict(zip(addresses, normalized_liquidation_chance_values))
    
    def calculate_max_drawdown(self, address):
        filtered_data = self.data[self.data['address'] == address]
        cumulative_returns = np.cumprod(1 + filtered_data['return']) - 1
        peaks = np.maximum.accumulate(cumulative_returns)
        troughs = np.minimum.accumulate(cumulative_returns)
        drawdowns = peaks - troughs
        max_drawdown = np.max(drawdowns)
        return max_drawdown
    
    def Normalized_Max_Drawdown(self):
        values = normalize_values([self.calculate_max_drawdown(address) for address in self.data["address"].unique()])
        risk_df = {}
        for i, address in enumerate(self.data["address"].unique()):
            risk_df[address] = values[i]
        return risk_df
    
    def addToCSV(self, file):
        data = []
        NCVNR = self.Normalized_CV_negative_returns()
        NV = self.Normalized_VaR()
        NCV = self.Normalized_cVaR()
        NLC = self.Normalized_Liquidation_chance()
        current = 0
        weights = [0.3, 0.3, 0.3, 0.1]
        dic = {}
        for i in range(len(NV)):
            dic["Address"] = list(NV.keys())[i]
            dic["Normalized VaR"] = list(NV.values())[i]
            dic["Normalized CVaR"] = list(NCV.values())[i]
            dic["Normalized Liquidation Chance"] = list(NLC.values())[i]
            dic["Weighted"] = list(NV.values())[i] * weights[1] + list(NCV.values())[i] * weights[2] + list(NLC.values())[i] * weights[0]
        with open(file, 'w', newline='') as file:
            writer = csv.DictWriter(file, fieldnames=dic[0].keys())
            writer.writeheader()
            writer.writerows(dic)

    def runMultiple(self, simulations=100):
        NCVNR = self.Normalized_CV_negative_returns()
        NV = self.Normalized_VaR()
        NCV = self.Normalized_cVaR()
        NLC = self.Normalized_Liquidation_chance()
        data = []
        for j in range(simulations):
            weights = generate_weights(4, 0.20, 0.45)
            dic = {}
            for key in NCV.keys():
                dic[key] = NCVNR[key] * weights[0] + NV[key] * weights[1] + NCV[key] * weights[2] + NLC[key] * weights[3]
            data.append(dic)
        drawdowns = self.Normalized_Max_Drawdown()
        drawdowns = {key: drawdowns[key] for key in data[0]}
        curr, currentMinimumMSE = None, float('inf')
        for nvs in data:
            totalSquaredError = sum([(nvs[key] - drawdowns[key]) ** 2 for key, value in nvs.items()])
            meanSquaredError = totalSquaredError / len(nvs) 
            if meanSquaredError < currentMinimumMSE: currentMinimumMSE, curr = meanSquaredError, nvs
        print(weights)
        
    def rankWeights(self):
        NCVNR = self.Normalized_CV_negative_returns()
        NV = self.Normalized_VaR()
        NCV = self.Normalized_cVaR()
        NLC = self.Normalized_Liquidation_chance()
        drawdowns = self.Normalized_Max_Drawdown()
        drawdowns = {key: drawdowns[key] for key in NV.keys()}
        
        dictionary = {}
        dictionary["NCVNR"] = sum([(NCVNR[key] - drawdowns[key]) ** 2 for key in NCVNR.keys()]) / len(NCVNR)
        dictionary["NV"] = sum([(NV[key] - drawdowns[key]) ** 2 for key in NV.keys()]) / len(NV)
        dictionary["NCV"] = sum([(NCV[key] - drawdowns[key]) ** 2 for key in NCV.keys()]) / len(NCV)
        dictionary["NLC"] = sum([(NLC[key] - drawdowns[key]) ** 2 for key in NLC.keys()]) / len(NLC)
        dictionary = dict(sorted(dictionary.items(), key=lambda item: item[1]))
        
        for i, item in enumerate(dictionary.items()):
            print(f"{i + 1} - {item[0]}:{item[1]}")
        

RT = RiskTest(df)
RT.rankWeights()
# RT.addToCSV("Risk Test.csv")

1 - NLC:0.047993521956239016
2 - NCVNR:0.4523533051742929
3 - NCV:0.7907727419680378
4 - NV:0.8839590500515858


In [12]:
class ProfitabilityTest:
    
    def __init__(self, data):
        self.data = data
        
    def calculate_pnltovolume(self, address):
        df_filtered = self.data[self.data["address"] == address]
        if len(df_filtered) == 0: return None
        total_pnl = sum(df_filtered['pnl'])
        total_volume = sum(df_filtered['volume'])    
        return total_pnl / total_volume

    def Normalized_Pnltovolume_y(self):
        addresses = self.data['address'].unique()
        pnltovolume_values = [self.calculate_pnltovolume(address) for address in addresses]
        normalized_pnltovolume_values = normalize_values(pnltovolume_values)
        return dict(zip(addresses, normalized_pnltovolume_values))
    
    def calculate_pnltocollateral(self, address):
        grouped = self.data.groupby("address").agg({"pnl": "sum", "volume": "sum"})
        grouped["pnltocollateral"] = grouped["pnl"] / grouped["volume"]
        return grouped.loc[address, "pnltocollateral"]

    def Normalized_Pnltocollateral(self):
        addresses = self.data["address"].unique()
        pnltocollateral_values = [self.calculate_pnltocollateral(address) for address in addresses]
        normalized_pnltocollateral_values = normalize_values(pnltocollateral_values)
        return dict(zip(addresses, normalized_pnltocollateral_values))
    
    def calculate_expected_return(self, address):
        expected_return = self.data[self.data["address"] == address]["return"].mean()
        return expected_return

    def Normalized_Expected_return_y(self):
        addresses = self.data["address"].unique()
        expected_return_values = [self.calculate_expected_return(address) for address in addresses]
        normalized_expected_return_values = normalize_values(expected_return_values)
        return dict(zip(addresses, normalized_expected_return_values))
    
    def calculate_sharpe_ratio(self, address, risk_free_rate=0.02):
        filtered_data = self.data[self.data['address'] == address]
        excess_returns = filtered_data['return'] - risk_free_rate
        sharpe_ratio = np.mean(excess_returns) / np.std(excess_returns)
        return sharpe_ratio
    
    def Normalized_Sharpe_Ratio(self):
        values = normalize_values([self.calculate_sharpe_ratio(address) for address in self.data["address"].unique()])
        profitability_df = {}
        for i, address in enumerate(self.data["address"].unique()):
            profitability_df[address] = values[i]
        return profitability_df
    
    def addToCSV(self, file):
        data = []
        NPV = self.Normalized_Pnltovolume_y()
        NPC = self.Normalized_Pnltocollateral()
        NER = self.Normalized_Expected_return_y()
        current = 0
        weights = [0.3, 0.3, 0.4]
        for i in range(len(NPV)):
            dic = {}
            dic["Address"] = list(NPV.keys())[i]
            dic["Normalized PNL To Volume"] = list(NPV.values())[i]
            dic["Normalized PNL To Collateral"] = list(NPC.values())[i]
            dic["Normalized Expected Return"] = list(NER.values())[i]
            dic["Weighted Value"] = list(NPV.values())[i] * weights[0] + list(NPC.values())[i] * weights[1] + list(NER.values())[i] * weights[2]
            data.append(dic)
        with open(file, 'w', newline='') as file:
            writer = csv.DictWriter(file, fieldnames=data[0].keys())
            writer.writeheader()
            writer.writerows(data)
    
    def runMultiple(self, simulations=100):
        NPV = self.Normalized_Pnltovolume_y()
        NPC = self.Normalized_Pnltocollateral()
        NER = self.Normalized_Expected_return_y()
        data = []
        for j in range(simulations):
            weights = generate_weights(3, 0.275, 0.425)
            dic = {}
            for key in NPV.keys():
                dic[key] = NPV[key] * weights[0] + NPC[key] * weights[1] + NER[key] * weights[2]
            data.append(dic)
        sharpeRatios = self.Normalized_Sharpe_Ratio()
        sharpeRatios = {key: sharpeRatios[key] for key in data[0]}
        curr, currentMinimumMSE = None, float('inf')
        for nvs in data:
            totalSquaredError = sum([(nvs[key] - sharpeRatios[key]) ** 2 for key, value in nvs.items()])
            meanSquaredError = totalSquaredError / len(nvs) 
            if meanSquaredError < currentMinimumMSE: currentMinimumMSE, curr = meanSquaredError, nvs
        print(weights)
        
    def rankWeights(self):
        NPV = self.Normalized_Pnltovolume_y()
        NPC = self.Normalized_Pnltocollateral()
        NER = self.Normalized_Expected_return_y()
        sharpeRatios = self.Normalized_Sharpe_Ratio()
        sharpeRatios = {key: sharpeRatios[key] for key in NPV.keys()}
        
        dictionary = {}
        dictionary["NPV"] = sum([(NPV[key] - sharpeRatios[key]) ** 2 for key in NPV.keys()]) / len(NPV)
        dictionary["NPC"] = sum([(NPC[key] - sharpeRatios[key]) ** 2 for key in NPC.keys()]) / len(NPC)
        dictionary["NER"] = sum([(NER[key] - sharpeRatios[key]) ** 2 for key in NER.keys()]) / len(NER)
        dictionary = dict(sorted(dictionary.items(), key=lambda item: item[1]))
        
        for i, item in enumerate(dictionary.items()):
            print(f"{i + 1} - {item[0]}:{item[1]}")
            
            
PT = ProfitabilityTest(df)
PT.rankWeights()
PT.addToCSV("Profitability Test.csv")

1 - NPV:0.021273964493066882
2 - NPC:0.021273964493066892
3 - NER:0.028792707927005372


In [10]:
def calculate_max_drawdown(data, address):
    filtered_data = data[data['address'] == address]
    cumulative_returns = np.cumprod(1 + filtered_data['return']) - 1
    peaks = np.maximum.accumulate(cumulative_returns)
    troughs = np.minimum.accumulate(cumulative_returns)
    drawdowns = peaks - troughs
    max_drawdown = np.max(drawdowns)
    return max_drawdown

risk_df = {"address":[], "Max Drawdown":[]}
for address in addresses:
    risk_df["address"].append(address)
    risk_df["Max Drawdown"].append(calculate_max_drawdown(df, address))
risk_df["Max Drawdown"] = normalize_values(risk_df["Max Drawdown"])
# risk_df = pd.DataFrame(risk_df)
# risk_df.to_csv('Risk.csv', index=False)

NameError: name 'addresses' is not defined

In [None]:
df['block_time'] = pd.to_datetime(df['block_time'])
average_monthly_returns = df.groupby(['address', df['block_time'].dt.to_period('M')])['return'].mean()
# std_dev_returns = df.groupby(['address', df['block_time'].dt.to_period('M')])['return'].std()
# avg_returns_factor_std = average_monthly_returns / std_dev_returns
# avg_returns_factor_std
dfTemp = pd.DataFrame({'address': average_monthly_returns.index.get_level_values('address'),
                              'Month': average_monthly_returns.index.get_level_values('block_time'),
                              'Average Income': average_monthly_returns})
# dfTemp = pd.read_csv('Regression.csv')
addresses = dfTemp["address"].unique()
regression_df = {"address" : addresses, "Average Monthly" : []}
regression_df["Average Monthly"] = normalize_values([dfTemp[dfTemp["address"] == address]["Average Income"].mean() for address in addresses])
# regression_df = pd.DataFrame(regression_df)
# regression_df.to_csv('Regression.csv', index=False)

In [None]:
data = {}
data.update(profitability_df)
data.update(risk_df)
data.update(regression_df)
data = pd.DataFrame(data)
data.to_csv('Combined.csv', index=False)

In [None]:
df = pd.read_csv('Combined.csv')
df['z_max_drawdown'] = (df['Max Drawdown'] - df['Max Drawdown'].mean()) / df['Max Drawdown'].std()
df['z_sharpe_ratio'] = (df['Sharpe Ratio'] - df['Sharpe Ratio'].mean()) / df['Sharpe Ratio'].std()
df['z_avg_monthly_returns'] = (df['Average Monthly'] - df['Average Monthly'].mean()) / df['Average Monthly'].std()

weights = generate_weights(3, 0.3, 0.4)

# Calculate weighted sum of standardized metrics
df['weighted_sum'] = (weights[0] * df['z_max_drawdown']) + (weights[1] * df['z_sharpe_ratio']) + (weights[2] * df['z_avg_monthly_returns'])

# df['rank'] = df['weighted_sum'].rank(ascending=False)
# sorted_addresses = df.sort_values('rank')['address'].tolist()
# sorted_addresses

In [None]:
# dfPT = pd.read_csv("Profitability Test.csv")
# PTDict = {address:dfPT[dfPT["Address"] == address]["Weighted Value"] for address in dfPT["Address"].unique()}
# SRDict = {address:df[df["address"] == address]["weighted_sum"] for address in df["address"].unique()}
# SRDict = {key: SRDict[key] for key in PTDict}

In [None]:
# # Given target values as a dictionary
# target_values = {"A": 10, "B": 20, "C": 30, "D": 40, "E": 50}

# # Simulated calculated values from multiple iterations
# calculated_values = [
#     {"A": 12, "B": 18, "C": 28, "D": 43, "E": 55},
#     {"A": 11, "B": 21, "C": 29, "D": 39, "E": 49},
#     {"A": 9, "B": 19, "C": 32, "D": 42, "E": 52},
#     {"A": 10, "B": 20, "C": 30, "D": 40, "E": 50},  # Closest to the target values
#     {"A": 14, "B": 22, "C": 31, "D": 37, "E": 45}
# ]

# # Calculate the sum of absolute differences for each calculated value
# curr, currentMinimumMSE = None, float('inf')
# for calculated in calculated_values:
#     totalSquaredError = sum([(calculated[key] - target_values[key]) ** 2 for key, value in calculated.items()])
#     meanSquaredError = totalSquaredError / len(calculated)  # Calculate the mean
#     if meanSquaredError < currentMinimumMSE: currentMinimumMSE, curr = meanSquaredError, calculated

# print("Closest calculated value:", curr)


In [None]:
min(generate_weights(3, 0.19, 0.36))