# Analysis_of_financial_performance

# Modules

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import os, re

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format', '{:.20f}'.format)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import re
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
from scipy import stats
from scipy.stats import ttest_ind


from datetime import datetime, timedelta

from tqdm import tqdm
import numpy as np
import random
from scipy.stats import pearsonr
from statsmodels.tsa.stattools import adfuller, coint, acf
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')



In [None]:
from google.colab import drive
import os
import shutil

# Mount Google Drive
drive.mount('/content/drive')


In [None]:
cd /content/drive/MyDrive/MSC_YORK/PROJECT/

In [None]:
pwd

# Pre-processing

## Collect metrics

In [None]:
back_testing_results = pd.read_csv("BACKTESTING/back_testing_results.csv")
back_testing_results = back_testing_results.loc[(back_testing_results['ai_algo-annualized_volatility'] >0) & (back_testing_results['standard_algo-annualized_volatility'] > 0.00000000000000000000)]
back_testing_results.head(10)


In [None]:
back_testing_results[['ai_algo-annualized_return','standard_algo-annualized_return']].tail()

## Final dataset

In [None]:
back_testing_results['ai_algo-max_drawdown'] = abs(back_testing_results['ai_algo-max_drawdown'])
back_testing_results['standard_algo-max_drawdown'] = abs(back_testing_results['standard_algo-max_drawdown'])

In [None]:
back_testing_results['ai_algo-fees'] = abs(back_testing_results['ai_algo-fees'])
back_testing_results['standard_algo-fees'] = abs(back_testing_results['standard_algo-fees'])

In [None]:
back_testing_results['ai_algo-num_trades_is_positive'] = back_testing_results['ai_algo-num_trades'] > 0

In [None]:
back_testing_results.head()

In [None]:
columns = [col for col in back_testing_results.columns if ('rmse' not in col) and ('mse' not in col) and ('mae' not in col) and ('r2' not in col) and ('mape' not in col)]
columns
back_testing_results = back_testing_results[columns]

In [None]:
scope_mae = pd.read_csv("EXPLORATIVE_DATA_ANALYSIS/scope.csv")
scope_mae
scope = scope_mae[:4].query("coin1 !='XRPUSDT'")

In [None]:
scope

In [None]:
back_testing_results = back_testing_results.merge(scope, on=['coin1', 'coin2',], how='inner')

# Functions

In [None]:
def describe(df, ai_col, standard_col):
  """
    Generate descriptive statistics for AI and standard model's metrics.

    Parameters:
    ----------
    df : pandas.DataFrame
        The DataFrame containing the metrics.
    ai_col : str
        The name of the column with AI model's metrics.
    standard_col : str
        The name of the column with standard model's metrics.

    Returns:
    -------
    pandas.DataFrame
        A DataFrame with descriptive statistics.
  """
  desc_ai = df[ai_col].describe().to_frame().transpose()
  desc_std = df[standard_col].describe().to_frame().transpose()

  comparison_table = pd.concat([desc_ai, desc_std], axis=0)
  comparison_table.index = [ai_col, standard_col]

  return comparison_table

def find_max_rejection_threshold(df, col,pace=0.01):
    """
    Find the maximum threshold where the null hypothesis (H0) can be rejected.

    This function starts with a threshold of 0 and increases it iteratively until the null hypothesis
    can no longer be rejected. It returns the last threshold where H0 was rejected.

    Parameters:
    ----------
    df : pandas.DataFrame
        The DataFrame containing the data.
    col : str
        The name of the column to test.

    Returns:
    -------
    max_threshold : float
        The maximum threshold where the null hypothesis can still be rejected.
    """
    import scipy.stats as stats

    # Significance level
    alpha = 0.05

    # Initial threshold
    threshold = 0
    max_threshold = 0

    # Check normality of the data
    _, p_value_col1 = stats.shapiro(df[col])
    print(f'P-value for normality test on {col}: {p_value_col1}')

    normally_distributed = p_value_col1 > alpha

    # Continue increasing the threshold until the null hypothesis is not rejected
    while True:
        if normally_distributed:
            # Perform one-sample t-test for normally distributed data
            result = stats.ttest_1samp(a=df[col], popmean=threshold, alternative='greater')
            p_value = result.pvalue
        else:
            # Perform Wilcoxon signed-rank test for non-normally distributed data
            _, p_value = stats.wilcoxon(df[col] - threshold, alternative='greater')

        if p_value < alpha:
            max_threshold = threshold
        else:
            break

        # Increase the threshold slightly
        threshold += pace

    print(f"The maximum threshold where the null hypothesis is rejected: {max_threshold}")
    return max_threshold



def t_test(df,col,threshold):
  """
    Perform a one-sample t-test to compare the mean or median of a column to a threshold.

    Parameters:
    ----------
    df : pandas.DataFrame
        The DataFrame containing the data.
    col : str
        The name of the column to test.
    threshold : float
        The threshold to compare against.
  """
  import scipy.stats as stats
  alpha = 0.05

  result_df = pd.DataFrame([])
  result_df['col'] = [col]
  result_df['threshold'] = [threshold]
  result_df['normality_test_p_value_col'] = [None]
  result_df['is_normal'] = [None]
  result_df['t_stat'] = [None]
  result_df['p_value_ttest'] = [None]
  result_df['wilcoxon_stat'] = [None]
  result_df['p_value_wilcoxon']  = [None]

  # Check normality
  _, p_value_col = stats.shapiro(df[col])
  print(f'P-value for normality test on {col}: {p_value_col}')
  result_df['normality_test_p_value_col'] = [p_value_col]


  if p_value_col > alpha:
    print('The distribution is normally distributed.')
    normal = 'yes'

    # Perform one-sample t-test
    result = stats.ttest_1samp(a=df[col], popmean=threshold, alternative='greater')

    # Print the results
    print(f"t-statistic: {result.statistic:.2f}")
    print(f"p-value: {result.pvalue:.3f}")

    result_df['t_stat'] = [result.statistic]
    result_df['p_value_ttest'] = [result.pvalue]

    # Interpretation
    alpha = 0.05  # Significance level
    if result.pvalue < alpha:
      print(f'Reject the null hypothesis: The mean is significantly greater than {threshold}.')
      decision = 'Reject H0'
    else:
      print(f'Fail to reject the null hypothesis: The mean is not significantly greater than {threshold}.')
      decision = 'Fail to reject H0'
  else:
    print('The series is not normally distributed.')
    normal = 'no'

    # Perform the Wilcoxon signed-rank test
    wilcoxon_stat, p_value_wilcoxon = stats.wilcoxon(df[col] - threshold, alternative='greater')

    print(f'Wilcoxon statistic: {wilcoxon_stat}')
    print(f'P-value for the Wilcoxon signed-rank test: {p_value_wilcoxon}')
    result_df['wilcoxon_stat'] = [wilcoxon_stat]
    result_df['p_value_wilcoxon'] = [p_value_wilcoxon]

    # Interpretation
    if p_value_wilcoxon < alpha:
      print(f'Reject the null hypothesis: The median is significantly greater than {threshold}.')
      decision = 'Reject H0'
    else:
      print(f'Fail to reject the null hypothesis: The median is not significantly greater than {threshold}.')
      decision = 'Fail to reject H0'

  result_df['is_normal'] = [normal]
  result_df['decision'] = [decision]
  return result_df



def two_sample_t_test(df, col1, col2):
    """
    Perform a two-sample t-test to compare AI and standard model's metrics.

    Parameters:
    ----------
    df : pandas.DataFrame
        The DataFrame containing the metrics.
    ai_col : str
        The name of the column with AI model's metrics.
    standard_col : str
        The name of the column with standard model's metrics.
    """
    from scipy import stats
    alpha = 0.05
    result_df = pd.DataFrame([])
    result_df['col1'] = [col1]
    result_df['col2'] = [col2]
    result_df['normality_test_p_value_col1'] = [None]
    result_df['normality_test_p_value_col2'] = [None]
    result_df['is_normal'] = [None]
    result_df['p_value_equal_variance_levene_test'] = [None]
    result_df['t_stat'] = [None]
    result_df['p_value_ttest'] = [None]
    result_df['u_stat'] = [None]
    result_df['p_value_mannwhitney'] = [None]


    # Check normality
    _, p_value_col1 = stats.shapiro(df[col1])
    _, p_value_col2 = stats.shapiro(df[col2])

    print(f'P-value for normality test on {col1}: {p_value_col1}')
    print(f'P-value for normality test on {col2}: {p_value_col2}')

    result_df['normality_test_p_value_col1'] = [p_value_col1]
    result_df['normality_test_p_value_col2'] = [p_value_col2]

    if p_value_col1 > alpha and p_value_col2 > alpha:
        print('Both groups are normally distributed.')
        normal = 'yes'

        # Perform Levene's test for equal variances
        _, p_value_var = stats.levene(df[col1], df[col2])
        print(f'P-value for equal variance test: {p_value_var}')
        result_df['p_value_equal_variance_levene_test'] = [p_value_var]

        # Perform the one-tailed two-sample t-test
        if p_value_var > alpha:  # Variances are equal
            t_stat, p_value_ttest = stats.ttest_ind(df[col1], df[col2], equal_var=True, alternative='greater')
        else:  # Variances are not equal
            t_stat, p_value_ttest = stats.ttest_ind(df[col1], df[col2], equal_var=False, alternative='greater')

        print(f'T-statistic: {t_stat}')
        print(f'P-value for the t-test: {p_value_ttest}')
        result_df['t_stat'] = [t_stat]
        result_df['p_value_ttest'] = [p_value_ttest]

        # Interpretation
        if p_value_ttest < alpha:
            print(f'Reject the null hypothesis: The mean of {col1} is significantly greater than the mean of {col2}.')
            decision = 'Reject H0'
        else:
            print(f'Fail to reject the null hypothesis: The mean of {col1} is not significantly greater than the mean of {col2}.')
            decision = 'Fail to reject H0'

    else:
        print('At least one of the groups is not normally distributed.')
        normal = 'no'

        # Perform the Mann-Whitney U test
        u_stat, p_value_mannwhitney = stats.mannwhitneyu(df[col1], df[col2], alternative='greater')

        print(f'U-statistic: {u_stat}')
        print(f'P-value for the Mann-Whitney U test: {p_value_mannwhitney}')
        result_df['u_stat'] = [u_stat]
        result_df['p_value_mannwhitney'] = [p_value_mannwhitney]

        # Interpretation
        if p_value_mannwhitney < alpha:
            print(f'Reject the null hypothesis: The median of {col1} is significantly greater than the median of {col2}.')
            decision = 'Reject H0'
        else:
            print(f'Fail to reject the null hypothesis: The median of {col1} is not significantly greater than the median of {col2}.')
            decision = 'Fail to reject H0'


    result_df['is_normal'] = [normal]
    result_df['decision'] = [decision]
    return result_df


def plot_metrics(df, ai_col, standard_col,metrics_name):
  """
    Plot the distribution of AI and standard model's metrics.

    Parameters:
    ----------
    df : pandas.DataFrame
        The DataFrame containing the metrics.
    ai_col : str
        The name of the column with AI model's metrics.
    standard_col : str
        The name of the column with standard model's metrics.
    metrics_name : str
        The name of the metrics to plot.
  """
  plt.figure(figsize=(20, 10))
  plt.hist(df[ai_col], bins=range(-100, 105, 5), alpha=0.5, label=ai_col)
  plt.hist(df[standard_col], bins=range(-100, 105, 5), alpha=0.5, label=standard_col)
  plt.axvline(df[ai_col].mean(), color='blue', linestyle='-', linewidth=2, label=f'{ai_col}-Mean')
  plt.axvline(df[ai_col].median(), color='blue', linestyle='--', linewidth=2, label=f'{ai_col}-Median')
  plt.axvline(df[standard_col].mean(), color='red', linestyle='-', linewidth=2, label=f'{standard_col}-Mean')
  plt.axvline(df[standard_col].median(), color='red', linestyle='--', linewidth=2, label=f'{standard_col}-Median')

  # Set the title and labels
  plt.title(f'Distribution of {metrics_name}')
  plt.xlabel(metrics_name)
  plt.ylabel('Frequency')

  # Rotate x-axis labels
  plt.xticks(rotation=45)

  # Add a legend
  plt.legend()

  # Display the plot
  plt.tight_layout()
  plt.savefig(f'EXPLORATIVE_DATA_ANALYSIS/{metrics_name}_distribution_plot.png')

  # plt.show()



# Set empty df

In [None]:
all_stats = pd.DataFrame()
two_samples_result_df = pd.DataFrame()
one_sample_result_df = pd.DataFrame()

# Sharpe ratio

In [None]:
back_testing_results['ai_algo-sharpe_ratio'] = back_testing_results['ai_algo-sharpe_ratio'].astype('float64')

In [None]:
back_testing_results.coin1.value_counts()

In [None]:
stats = describe(back_testing_results,'ai_algo-sharpe_ratio', 'standard_algo-sharpe_ratio')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)

In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results, 'ai_algo-sharpe_ratio', 'standard_algo-sharpe_ratio')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results, 'standard_algo-sharpe_ratio', 'ai_algo-sharpe_ratio')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-sharpe_ratio')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-sharpe_ratio',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-sharpe_ratio')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-sharpe_ratio',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
one_sample_result_df

In [None]:
plot_metrics(back_testing_results, 'ai_algo-sharpe_ratio', 'standard_algo-sharpe_ratio','sharpe_ratio')

# Annualized return

In [None]:
stats = describe(back_testing_results,'ai_algo-annualized_return', 'standard_algo-annualized_return')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results, 'ai_algo-annualized_return','standard_algo-annualized_return')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results, 'standard_algo-annualized_return','ai_algo-annualized_return')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-annualized_return')
one_sample_result_tmp_df = one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-annualized_return',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-annualized_return')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-annualized_return',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

In [None]:
plot_metrics(back_testing_results,  'ai_algo-annualized_return','standard_algo-annualized_return','sharpe_ratio')

# annualised volatility

In [None]:
stats = describe(back_testing_results,'ai_algo-annualized_volatility', 'standard_algo-annualized_volatility')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results, 'ai_algo-annualized_volatility','standard_algo-annualized_volatility')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-annualized_volatility','ai_algo-annualized_volatility')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-annualized_volatility', pace = 0.001)
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-annualized_volatility',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-annualized_volatility', pace = 0.001)
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-annualized_volatility',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

# Net Pnl

In [None]:
stats = describe(back_testing_results, 'ai_algo-net_pnl', 'standard_algo-net_pnl')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results, 'ai_algo-net_pnl','standard_algo-net_pnl')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-net_pnl','ai_algo-net_pnl')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-net_pnl')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-net_pnl',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-net_pnl')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-net_pnl',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)


In [None]:
plot_metrics(back_testing_results, 'ai_algo-net_pnl', 'standard_algo-net_pnl','net_pnl')

# Number of trades

In [None]:
stats = describe(back_testing_results, 'ai_algo-num_trades','standard_algo-num_trades')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results,  'ai_algo-num_trades','standard_algo-num_trades')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-num_trades', 'ai_algo-num_trades')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-num_trades')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-num_trades',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-num_trades')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-num_trades',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

In [None]:
plot_metrics(back_testing_results, 'ai_algo-num_trades','standard_algo-num_trades','number_of_trades')

# Fees

In [None]:
stats = describe(back_testing_results, 'ai_algo-fees', 'standard_algo-fees')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results,  'ai_algo-fees','standard_algo-fees')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-fees', 'ai_algo-fees')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-fees')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-fees',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-fees')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-fees',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)


In [None]:
plot_metrics(back_testing_results, 'ai_algo-fees', 'standard_algo-fees','fees')

# Average duration

In [None]:
stats = describe(back_testing_results, 'ai_algo-avg_trade_duration', 'standard_algo-avg_trade_duration')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results,  'ai_algo-avg_trade_duration','standard_algo-avg_trade_duration')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-avg_trade_duration', 'ai_algo-avg_trade_duration')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-avg_trade_duration')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-avg_trade_duration',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-avg_trade_duration')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-avg_trade_duration',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

In [None]:
plot_metrics(back_testing_results, 'ai_algo-avg_trade_duration', 'standard_algo-avg_trade_duration', 'average_trade_duration')

# Max drawdown

In [None]:
stats = describe(back_testing_results,'ai_algo-max_drawdown', 'standard_algo-max_drawdown')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
from scipy import stats
two_samples_result_tmp_df =   two_sample_t_test(back_testing_results,  'ai_algo-max_drawdown','standard_algo-max_drawdown')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-max_drawdown', 'ai_algo-max_drawdown')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
back_testing_results['standard_algo-max_drawdown']

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-max_drawdown',pace =0.000001)
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-max_drawdown',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-max_drawdown',pace =0.000001)
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-max_drawdown',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

# Win ratio

In [None]:
stats = describe(back_testing_results, 'ai_algo-win_ratio', 'standard_algo-win_ratio')
stats
all_stats = pd.concat([stats,all_stats], axis = 0)


In [None]:
from scipy import stats

two_samples_result_tmp_df =   two_sample_t_test(back_testing_results, 'ai_algo-win_ratio','standard_algo-win_ratio')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)
two_samples_result_tmp_df =  two_sample_t_test(back_testing_results,'standard_algo-win_ratio', 'ai_algo-win_ratio')
two_samples_result_df = pd.concat([two_samples_result_tmp_df,two_samples_result_df], axis = 0)

In [None]:
threshold = find_max_rejection_threshold(back_testing_results, 'ai_algo-win_ratio')
one_sample_result_tmp_df = t_test(back_testing_results,'ai_algo-win_ratio',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)
print("####################################")
threshold = find_max_rejection_threshold(back_testing_results, 'standard_algo-win_ratio')
one_sample_result_tmp_df = t_test(back_testing_results,'standard_algo-win_ratio',threshold)
one_sample_result_df = pd.concat([one_sample_result_tmp_df,one_sample_result_df], axis = 0)

# aggregated output

In [None]:
two_samples_result_df

In [None]:
two_samples_result_df['metric'] = two_samples_result_df.apply(lambda x : x['col1'].split('-')[1], axis = 1)
two_samples_result_df['algo_type_col1'] = two_samples_result_df.apply(lambda x : x['col1'].split('-')[0], axis = 1)
two_samples_result_df['algo_type_col2'] = two_samples_result_df.apply(lambda x : x['col2'].split('-')[0], axis = 1)
two_samples_result_df['H0'] = two_samples_result_df.apply(lambda x : f"{x['algo_type_col2']}>={x['algo_type_col1']}", axis = 1)
two_samples_result_df['Ha'] = two_samples_result_df.apply(lambda x : f"{x['algo_type_col1']}>{x['algo_type_col2']}", axis = 1)
columns = [ 'metric',  'H0', 'Ha','col1', 'col2','normality_test_p_value_col1',
       'normality_test_p_value_col2', 'is_normal',
       'p_value_equal_variance_levene_test', 't_stat', 'p_value_ttest',
       'u_stat', 'p_value_mannwhitney', 'decision', ]
two_samples_result_df = two_samples_result_df[columns].sort_values(by = ['metric','H0'], ascending = [True,True])

In [None]:
one_sample_result_df['metric'] = one_sample_result_df.apply(lambda x : x['col'].split('-')[1], axis = 1)
one_sample_result_df['algo_type_col'] = one_sample_result_df.apply(lambda x : x['col'].split('-')[0], axis = 1)
one_sample_result_df['H0'] = one_sample_result_df.apply(lambda x : f"{x['algo_type_col']}<={x['threshold']}", axis = 1)
one_sample_result_df['Ha'] = one_sample_result_df.apply(lambda x : f"{x['algo_type_col']}>{x['threshold']}", axis = 1)
columns = [ 'metric', 'threshold', 'H0', 'Ha', 'normality_test_p_value_col', 'is_normal','t_stat',
       'p_value_ttest', 'wilcoxon_stat', 'p_value_wilcoxon', 'decision']
one_sample_result_df = one_sample_result_df[columns].sort_values(by = ['metric','H0'], ascending = [True,True])

In [None]:
all_stats = all_stats.reset_index()
all_stats['metric'] = all_stats.apply(lambda x : x['index'].split('-')[1], axis = 1)
all_stats['algo_type_col'] = all_stats.apply(lambda x : x['index'].split('-')[0], axis = 1)
columns = ['metric', 'algo_type_col', 'count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
all_stats = all_stats[columns]

In [None]:
all_stats.to_csv("EXPLORATIVE_DATA_ANALYSIS/all_stats.csv", index = False)
two_samples_result_df.to_csv("EXPLORATIVE_DATA_ANALYSIS/two_samples_result.csv", index = False)
one_sample_result_df.to_csv("EXPLORATIVE_DATA_ANALYSIS/one_sample_result.csv", index = False)