In [15]:
import pandas as pd
import numpy as np
import talib as ta

import seaborn as sns
import matplotlib.pyplot as plt


PATH_READ = 'Classification_result_ET_Binary.csv'


df = pd.read_csv(PATH_READ, index_col= 'Timestamp')

df = df[['Target_Class_Avg_1min', 'prediction_label']]

print(df)

                     Target_Class_Avg_1min  prediction_label
Timestamp                                                   
2023-07-07 14:42:00                     -1                -1
2023-07-07 14:43:00                      1                -1
2023-07-07 14:44:00                      1                 1
2023-07-07 14:45:00                      1                 1
2023-07-07 14:46:00                     -1                 1
...                                    ...               ...
2023-07-20 14:54:00                     -1                -1
2023-07-20 14:55:00                      1                 1
2023-07-20 14:56:00                     -1                 1
2023-07-20 14:57:00                     -1                -1
2023-07-20 15:00:00                      1                -1

[2160 rows x 2 columns]


In [7]:
min_value = df['prediction_label'].min()
max_value = df['prediction_label'].max()

print('Predict min: ' + str(min_value))
print('Predict max: ' + str(max_value))

Predict min: -0.0051898708715452
Predict max: 0.0092796870188612


In [8]:
def quantiles(df, num_interval):
    df_sorted = df.sort_values(by='prediction_label')
   
    quantile_markers = pd.qcut(df_sorted['prediction_label'], q=num_interval, labels=False)

# Add the quantile markers as a new column to the DataFrame
    df_sorted['Interval_Marker'] = quantile_markers

    return df_sorted

In [9]:
df_sorted = quantiles(df, 20)
print(df_sorted)


                     Target_Avg_1min  prediction_label  Interval_Marker
Timestamp                                                              
2023-07-19 09:34:00        -0.008285         -0.005190                0
2023-07-18 09:31:00        -0.005017         -0.004880                0
2023-07-14 09:32:00         0.002298         -0.004407                0
2023-07-14 09:36:00        -0.002159         -0.004390                0
2023-07-19 10:02:00        -0.005518         -0.003882                0
...                              ...               ...              ...
2023-07-17 13:06:00         0.005087          0.007877               19
2023-07-17 15:00:00        -0.001014          0.008443               19
2023-07-19 09:50:00         0.004894          0.008744               19
2023-07-20 09:26:00        -0.002677          0.008929               19
2023-07-12 09:26:00         0.002692          0.009280               19

[2160 rows x 3 columns]


In [10]:
def quant_analysis_list(df, num_interval, target):
    results = []

    for i in range(num_interval):
        selected_rows = df[df['Interval_Marker'] == i]

        acc = (selected_rows[target] * selected_rows['prediction_label'] > 0).mean() * 100
        p_corr = selected_rows[target].corr(selected_rows['prediction_label'])
        s_corr = selected_rows[target].corr(selected_rows['prediction_label'], method='spearman')
        true_mean = selected_rows[target].mean()
        pred_mean = selected_rows[target].mean()

        results.append([i, acc, p_corr, s_corr, true_mean, pred_mean])

    # Create the summary DataFrame
    columns = ['Interval', 'Percent in the Same Direction', 'Correlation Pearson', 'Correlation Spearman', 'True Mean', 'Predicted Mean']
    summary_df = pd.DataFrame(results, columns=columns)

    summary_df.index = summary_df['Interval']
    summary_df.drop(columns='Interval', inplace=True)

    return summary_df

In [11]:
print(quant_analysis_list(df_sorted, 20, 'Target_Avg_1min'))

          Percent in the Same Direction  Correlation Pearson  \
Interval                                                       
0                             88.888889             0.344777   
1                             78.703704             0.010374   
2                             83.333333             0.176896   
3                             72.222222            -0.051855   
4                             69.444444             0.011772   
5                             72.222222            -0.056912   
6                             65.740741             0.010040   
7                             67.592593            -0.015666   
8                             64.814815            -0.100602   
9                             56.481481             0.050784   
10                            54.629630             0.256286   
11                            44.444444             0.143205   
12                            55.555556             0.057716   
13                            61.111111 