### SET UP

In [1]:
import numpy as np
import pandas as pd
from data.DataManager import DataManager
from data.DataGenerator import DataGenerator
from data.DataCollectors import HistoricalDataCollector, RealTimeDataCollector, HistoricalDataCollectorParquet
from main.RandomnessAnalysis import RandomnessAnalysis
from main.PredictableDayAnalysis import PredictableDayAnalysis
from utils.Analysis import get_assets_properties, localization_predictable_intervals
from utils.MultiTester import MultiTester
from utils.VisualizationTools import plot_block_frequencies
from utils.Analysis import intervals_analysis
from utils.VisualizationTools import plot_test

In [None]:
asset_pairs = ['BTCUSDT','ETHUSDT','SOLUSDT',
               'BNBUSDT', 'AVAXUSDT', 'UNIUSDT',
               'LINKUSDT', 'AXSUSDT', 'RENDERUSDT']
symbols = {
            0: [(-np.inf, 0)
                , (False, False)],
            1: [(0, np.inf), (False, False)]
        }
s = 2

In [3]:
year, month, day = 2025, 2, None

In [4]:
historical_collector = HistoricalDataCollectorParquet(pairs=asset_pairs, year=year, month=month, day=day)
historical_collector.collect()

[SYSTEM] Processing BTCUSDT...
[SYSTEM] Parquet data already available for BTCUSDT → data/raw_data_parquet\BTCUSDT-trades-2025-02.parquet → Skipping download.
[SYSTEM] Processing ETHUSDT...
[SYSTEM] Parquet data already available for ETHUSDT → data/raw_data_parquet\ETHUSDT-trades-2025-02.parquet → Skipping download.
[SYSTEM] Processing UNIUSDT...
[SYSTEM] Parquet data already available for UNIUSDT → data/raw_data_parquet\UNIUSDT-trades-2025-02.parquet → Skipping download.


In [8]:
import pandas as pd

pair = "BTCUSDT"
data_manager = DataManager(["BTCUSDT"], symbols, year=year, month=month, day=day, aggregation_level=1)
blocks = data_manager.block_constructor(block_size=2, overlapping=True)
# 1. Charger directement le parquet "raw_data_parquet"
parquet_file = f"data/raw_data_parquet/{pair}-trades-{year}-{month:02d}-{day:02d}.parquet"
df_parquet = pd.read_parquet(parquet_file)
print(f"[INFO] Fichier parquet brut : {parquet_file}")
print(f"[INFO] Nombre de lignes dans le parquet : {len(df_parquet)}")
print(f"[INFO] Colonnes : {df_parquet.columns.tolist()}")
print(df_parquet.head())

# 2. Récupérer le DataFrame préprocessé par DataManager
df_preprocessed = data_manager.datasets[pair]
print("\n[INFO] DataManager dataset :")
print(f"[INFO] Nombre de lignes après preprocessing : {len(df_preprocessed)}")
print(f"[INFO] Colonnes : {df_preprocessed.columns.tolist()}")
print(df_preprocessed.head())

# 3. Récupérer les blocs
blocks = data_manager.block_constructor(block_size=2, overlapping=True)
blocks_btc = blocks[pair]
print("\n[INFO] Blocs :")
print(f"[INFO] Nombre total de blocs : {len(blocks_btc)} (should be n_rows - block_size + 1)")
print(f"[INFO] Forme blocs : {blocks_btc.shape}")
print(blocks_btc.head(10))  # afficher 10 blocs


[DataManager] Loading parquet file: data/raw_data_parquet\BTCUSDT-trades-2025-02-01.parquet
[INFO] Fichier parquet brut : data/raw_data_parquet/BTCUSDT-trades-2025-02-01.parquet
[INFO] Nombre de lignes dans le parquet : 2588786
[INFO] Colonnes : ['trade_id', 'price', 'volume', 'quote_qty', 'timestamp', 'is_buyer_maker', 'is_best_match']
     trade_id      price   volume  quote_qty         timestamp  \
0  4495881901  102429.56  0.00005   5.121478  1738368000182381   
1  4495881902  102429.56  0.00006   6.145774  1738368000182381   
2  4495881903  102429.56  0.00006   6.145774  1738368000302865   
3  4495881904  102429.56  0.00006   6.145774  1738368000302865   
4  4495881905  102429.56  0.00012  12.291547  1738368000302865   

   is_buyer_maker  is_best_match  
0           False           True  
1           False           True  
2           False           True  
3           False           True  
4           False           True  

[INFO] DataManager dataset :
[INFO] Nombre de lignes 

### Efficient vs Inefficient days

#### BTC

In [6]:
#Nouvelle version : 
data_manager = DataManager(["BTCUSDT"], symbols, year=year, month=month, day=day, aggregation_level=1)
blocks = data_manager.block_constructor(block_size=2, overlapping=True)
analysis = PredictableDayAnalysis(pair="BTCUSDT", data_manager=data_manager)
analysis.analyze_days(block_size=2, n_jobs=8)

analysis.efficient_df

[DataManager] Loading parquet file: data/raw_data_parquet\BTCUSDT-trades-2025-02.parquet
[SYSTEM] Starting analysis → 28 days to process...


[BTCUSDT] Analyzing days: 100%|██████████| 28/28 [22:40<00:00, 48.58s/it] 

[SYSTEM] Analysis completed : 28 days analyzed → 0 efficient, 28 inefficient.





In [7]:
analysis.inefficient_df

Unnamed: 0,Jump fraction,Autocorrelation,Autocorrelation of squared returns,Student distribution degree of freedom,Student distribution mean,Student distribution standard deviation,Returns mean,Fractions of zero-returns,KL Divergence statistics,Empirical quantile,Hypothesis
0,0.009263,-0.036351,0.200766,1.988286,-1.284749e-08,8.283958e-07,-1.480791e-09,0.0,66234.35,6.634897,True
1,0.009993,-0.12635,0.306825,0.543643,-1.157639e-08,1.904304e-07,-3.173761e-08,0.0,82231.62,6.634897,True
2,0.00861,-0.279057,0.314318,1.988331,-8.971039e-09,1.152788e-06,3.268467e-10,0.0,187658.0,6.634897,True
3,0.007354,-0.104142,0.203054,1.988306,-3.746196e-08,1.492806e-06,-9.477338e-09,0.0,805567.0,6.634897,True
4,0.008015,-0.189714,0.389228,1.988325,5.129374e-10,1.574108e-06,-3.826893e-10,0.0,786710.4,6.634897,True
5,0.007836,-0.120107,0.167781,1.988297,-1.632158e-08,1.32749e-06,1.171574e-08,0.0,416054.8,6.634897,True
6,0.007812,-0.074027,0.250404,0.527825,-7.215345e-09,3.215333e-07,-5.093478e-10,0.0,722603.4,6.634897,True
7,0.008217,-0.121779,0.410299,1.988287,-1.25151e-08,1.099314e-06,-2.233954e-08,0.0,314663.5,6.634897,True
8,0.010853,-0.046108,0.417502,0.562942,-4.823422e-09,1.929159e-07,2.165876e-09,0.0,41906.63,6.634897,True
9,0.01121,-0.093456,0.425607,0.556852,-1.533488e-08,1.982305e-07,-4.727323e-08,0.0,46118.23,6.634897,True


#### ETH

In [None]:
data_manager = DataManager(["ETHUSDT"], symbols, year=year, month=month, aggregation_level=1)
blocks = data_manager.block_constructor(block_size=2, overlapping=True)
analysis = PredictableDayAnalysis(pair="ETHUSDT",data_manager=data_manager)
analysis.analyze_days(block_size=2, n_jobs=8)
analysis.efficient_df

[DataManager] Loading parquet file: data/raw_data_parquet\ETHUSDT-trades-2025-02.parquet
[SYSTEM] Starting analysis → 28 days to process...


[ETHUSDT] Analyzing days:  14%|█▍        | 4/28 [05:08<28:38, 71.61s/it] 

In [None]:
analysis.inefficient_df

### Predictable intervals locations (BTC)

In [None]:
df = localization_predictable_intervals(data_manager, "BTCUSDT", test='NP Statistic')
df.head(10)

### Aggregation level 1

In [None]:
data_manager = DataManager(asset_pairs, symbols, year=year, month=month, aggregation_level=1)

#### Non overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=False)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=False)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=False)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

#### Overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=True)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=True)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=True)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

### Aggregation level 5

In [None]:
data_manager = DataManager(asset_pairs, symbols, year=year, month=month, aggregation_level=5)

#### Non overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=False)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=False)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=False)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

#### Overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=True)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=True)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=True)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

### Aggregation level 20

In [None]:
data_manager = DataManager(asset_pairs, symbols, year=year, month=month, aggregation_level=20)

#### Non overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=False)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=False)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=False)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

#### Overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=True)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=True)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=True)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

### Aggregation level 50

In [None]:
data_manager = DataManager(asset_pairs, symbols, year=year, month=month, aggregation_level=50)

#### Non overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=False)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=False)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=False)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_entropy = analyser.entropy_bias_test()
test_entropy

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

#### Overlapping

In [None]:
blocks_size_2 = data_manager.block_constructor(block_size=2, overlapping=False)
blocks_size_3 = data_manager.block_constructor(block_size=3, overlapping=False)
blocks_size_5 = data_manager.block_constructor(block_size=5, overlapping=False)

##### Block size 2

In [None]:
blocks_btc = blocks_size_2['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_eth = blocks_size_2['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

In [None]:
blocks_uni = blocks_size_2['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
test_divergence = analyser.KL_divergence_test()
test_divergence

##### Block size 3

In [None]:
blocks_btc = blocks_size_3['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_3['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_3['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

##### Block size 5

In [None]:
blocks_btc = blocks_size_5['BTCUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_btc, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_eth = blocks_size_5['ETHUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_eth, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

In [None]:
blocks_uni = blocks_size_5['UNIUSDT']
analyser = RandomnessAnalysis(blocks_df=blocks_uni, s=s)
frequencies_df = analyser.compute_blocks_frequencies()
plot_block_frequencies(frequencies_df)

### MultiTester BTC

In [None]:
multi_tester = MultiTester(asset='BTCUSDT',symbols=symbols,overlapping=False)
df_test_block = multi_tester.test_by_block_size(test='Entropy Bias',
                                    max_block_size=15,
                                    year=year,
                                    month=month,
                                    aggregation_level=5)
df_test_block

In [None]:
plot_test(x_values=df_test_block.index, 
          y1_values=df_test_block['Test statistic'].values,
          y2_values=df_test_block['Quantile 99'].values,
          test='Entropy Bias',
          x_label='Block size',
          pair='BTCUSDT')

In [None]:
multi_tester = MultiTester(asset='BTCUSDT',symbols=symbols,overlapping=True)
df_test_agg = multi_tester.test_by_aggregation_level(test='NP Statistic',
                                           max_aggregation_level=50,
                                           year=year,
                                           month=month,
                                           block_size=2)
df_test_agg

In [None]:
plot_test(x_values=df_test_agg.index, 
          y1_values=df_test_agg['Test statistic'].values,
          y2_values=df_test_agg['Quantile 99'].values,
          test='NP Statistic',
          x_label='Aggregation level',
          pair='BTCUSDT')

In [None]:
btc_multi_tester = MultiTester(asset='BTCUSDT',symbols=symbols,overlapping=False)
btc_multi_tester.plot_3D_test_result(asset='BTCUSDT',
                                     test='Entropy Bias',
                                     max_block_size=15,
                                     year=year,month=month,
                                     max_aggregation_level=50)

### MultiTester ETH

In [None]:
multi_tester = MultiTester(asset='ETHUSDT',symbols=symbols,overlapping=False)
df_test_block = multi_tester.test_by_block_size(test='Entropy Bias',
                                    max_block_size=15,
                                    year=year,
                                    month=month,
                                    aggregation_level=5)
df_test_block

In [None]:
plot_test(x_values=df_test_block.index, 
          y1_values=df_test_block['Test statistic'].values,
          y2_values=df_test_block['Quantile 99'].values,
          test='Entropy Bias',
          x_label='Block size',
          pair='BTCUSDT')

In [None]:
multi_tester = MultiTester(asset='BTCUSDT',symbols=symbols,overlapping=True)
df_test_agg = multi_tester.test_by_aggregation_level(test='NP Statistic',
                                           max_aggregation_level=50,
                                           year=year,
                                           month=month,
                                           block_size=2)
df_test_agg

In [None]:
plot_test(x_values=df_test_agg.index, 
          y1_values=df_test_agg['Test statistic'].values,
          y2_values=df_test_agg['Quantile 99'].values,
          test='NP Statistic',
          x_label='Aggregation level',
          pair='BTCUSDT')

### MultiTester UNI

In [None]:
multi_tester = MultiTester(asset='UNIUSDT',symbols=symbols,overlapping=False)
df_test_block = multi_tester.test_by_block_size(test='Entropy Bias',
                                    max_block_size=15,
                                    year=year,
                                    month=month,
                                    aggregation_level=5)
df_test_block

In [None]:
plot_test(x_values=df_test_block.index, 
          y1_values=df_test_block['Test statistic'].values,
          y2_values=df_test_block['Quantile 99'].values,
          test='Entropy Bias',
          x_label='Block size',
          pair='BTCUSDT')

In [None]:
multi_tester = MultiTester(asset='BTCUSDT',symbols=symbols,overlapping=True)
df_test_agg = multi_tester.test_by_aggregation_level(test='NP Statistic',
                                           max_aggregation_level=50,
                                           year=year,
                                           month=month,
                                           block_size=2)
df_test_agg

In [None]:
plot_test(x_values=df_test_agg.index, 
          y1_values=df_test_agg['Test statistic'].values,
          y2_values=df_test_agg['Quantile 99'].values,
          test='NP Statistic',
          x_label='Aggregation level',
          pair='BTCUSDT')

### Fraction of predictable days

In [None]:
asset_pairs = ['BTCUSDT','ETHUSDT','SOLUSDT',
               'BNBUSDT', 'AVAXUSDT', 'UNIUSDT',
               'LINKUSDT', 'AXSUSDT', 'RENDERUSDT']

In [None]:
intervals_analysis(pairs=asset_pairs,
                   symbols=symbols,
                   max_aggregation_level=50,
                   year=year,
                   month=[month])

In [None]:
intervals_analysis(pairs=asset_pairs,
                   symbols=symbols,
                   max_aggregation_level=50,
                   year=year,
                   month=[month],
                   test = 'NP Statistic')

In [None]:
historical_collector = HistoricalDataCollector(pairs=asset_pairs, year=2025, month=2)
historical_collector.collect()

### Assets properties

In [None]:
data_manager = DataManager(asset_pairs, symbols, year=year, month=month, aggregation_level=1)
df_prop = get_assets_properties(asset_pairs, s=2, year=year, month=month)
df_prop