### Load dataframe

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from datetime import datetime

df = pd.read_csv("/Users/aleksandr/Desktop/Meta_Test.csv")
df = df.dropna()

In [1]:
import pandas as pd
aaa = pd.read_csv("/Users/aleksandr/Desktop/Meta_Test.csv")
aaa = aaa.dropna()
aaa = aaa[2000:2500]
aaa.shape

(500, 4)

In [2]:
from volatility import estimate_tick_volatility

aaa = estimate_tick_volatility(aaa, method = 'wavelet')

aaa.drop(columns=['return', "SYMBOL"], inplace= True)
aaa.rename(columns={'wavelet_vol' : 'Volatility', 
                  'TIMESTAMP':'Timestamp',
                   'VALUE' : 'Value',
                   'VOLUME' : 'Volume'}, inplace=True)
aaa.head()

Estimating advanced tick-level volatility for 500 ticks...
Computing wavelet-based volatility for META.O...
Completed advanced tick-level volatility estimation


Unnamed: 0,Timestamp,Value,Volume,Volatility
2000,2025-01-30T00:38:36.174Z,690.7747,99.0,0.000234
2001,2025-01-30T00:38:36.549Z,690.52,10.0,0.000234
2002,2025-01-30T00:38:37.066Z,690.1276,100.0,0.000233
2003,2025-01-30T00:38:38.121Z,690.49,66.0,0.000233
2004,2025-01-30T00:38:38.121Z,690.75,24.0,0.000232


### Initial cleaning

In [2]:
from clean import preprocess_tick_data

df_clean, df_diagnostics, outlier_counter = preprocess_tick_data(df)
df = df_clean
df = df.drop(columns="VOLATILITY")

Starting preprocessing with 570771 rows
After filtering trading hours: 282810 rows
After cleaning outliers: 282301 rows
Final clean dataset: 278585 rows

Outlier counts by detection method:
  zscore: 64
  extreme_deviation: 69
  isolated_point: 390
  price_reversal: 93
  timestamp_group: 34
  price_velocity: 3703
  suspicious_cluster: 52
  wavelet_outlier: 24


### Volatility estimation

In [3]:
from volatility import estimate_tick_volatility

df = estimate_tick_volatility(df, method = 'wavelet')

Estimating advanced tick-level volatility for 278585 ticks...
Computing wavelet-based volatility for META.O...
Completed advanced tick-level volatility estimation


In [4]:
df.drop(columns=['return', "SYMBOL"], inplace= True)
df.rename(columns={'wavelet_vol' : 'Volatility', 
                  'TIMESTAMP':'Timestamp',
                   'VALUE' : 'Value',
                   'VOLUME' : 'Volume'}, inplace=True)
df.head()

Unnamed: 0,Timestamp,Value,Volume,Volatility
0,2025-01-30 09:30:00.740000+00:00,694.24,13.0,0.00026
1,2025-01-30 09:30:00.740000+00:00,694.17,15.0,0.00026
2,2025-01-30 09:30:00.740000+00:00,694.17,15.0,0.000261
3,2025-01-30 09:30:00.740000+00:00,694.11,8.0,0.000261
4,2025-01-30 09:30:00.740000+00:00,694.1,249.0,0.000261


### Encoder Only Transformer with Contrastive Loss

In [6]:
import os
import pandas as pd
from datetime import datetime
from CronusV1.Feature_engineering.feature_model_v1 import process_market_data

# Set up paths and configuration
df = pd.read_csv("/Users/aleksandr/Desktop/my_data.csv")
df = df[1000:1500] 
model_dir = '/Users/aleksandr/code/scripts/CronusV1/Feature_engineering/saved_models'

# Set configuration

# Process data with the model 
features_df, model = process_market_data(
    df=df,
    model_dir=model_dir,
    retrain=False,
    num_epochs=10,
    context_length=20,
    num_attention_heads=8,
    num_encoder_layers=3,
    causal=True,
    temperature=0.5,
    grad_clip_norm=1.0
)

print(f"\nProcess completed successfully!")

Model will be loaded from: /Users/aleksandr/code/scripts/CronusV1/Feature_engineering/saved_models
Using causal mode. (Suitable for real-time applications)
Setting up feature extractor...
Extracting microstructure features...
Extracted 20 microstructure features
Preparing data tensors...
Data shapes - Values: torch.Size([481, 20, 20]), Time: torch.Size([481, 20, 8])
Loading pre-trained model...
Loading model from /Users/aleksandr/code/scripts/CronusV1/Feature_engineering/saved_models/regime_model_20250409_2137_causal.pt
Model loaded from /Users/aleksandr/code/scripts/CronusV1/Feature_engineering/saved_models/regime_model_20250409_2137_causal.pt
Extracting features...
Extracted features shape: (481, 16)
Features saved to /Users/aleksandr/code/scripts/CronusV1/Feature_engineering/saved_models/regime_features.csv

Process completed successfully!


In [8]:
features_df.head()

Unnamed: 0,Timestamp,Value,Volume,Volatility,regime_feature_1,regime_feature_2,regime_feature_3,regime_feature_4,regime_feature_5,regime_feature_6,regime_feature_7,regime_feature_8,regime_feature_9,regime_feature_10,regime_feature_11,regime_feature_12,regime_feature_13,regime_feature_14,regime_feature_15,regime_feature_16
0,2025-01-30 09:43:23.258000+00:00,690.1,20.0,0.000256,-0.029282,-0.128782,0.105374,-0.067034,-0.61737,-0.25096,0.269042,-0.130306,-0.126755,0.184723,-0.318724,-0.179031,-0.478511,0.071643,-0.030482,0.136815
1,2025-01-30 09:43:23.258000+00:00,690.16,21.0,0.000256,-0.023043,-0.087199,0.213792,-0.123163,-0.530858,-0.438696,0.227546,-0.097592,-0.158889,0.109344,-0.427257,-0.287524,-0.513854,0.012487,0.044955,0.144784
2,2025-01-30 09:43:23.258000+00:00,690.16,79.0,0.000256,-0.034108,-0.027098,0.249391,-0.106839,-0.392979,-0.512386,0.154055,-0.04002,-0.129154,-0.035572,-0.44796,-0.383848,-0.458906,-0.068336,0.077038,0.154237
3,2025-01-30 09:43:23.258000+00:00,690.03,59.0,0.000256,-0.06997,0.049692,0.245236,-0.03942,-0.044145,-0.409561,0.003498,0.08429,-0.098843,-0.272599,-0.282488,-0.485892,-0.163344,-0.196759,0.075929,0.273296
4,2025-01-30 09:43:23.258000+00:00,690.16,100.0,0.000256,-0.158085,0.052753,0.341358,0.03607,0.186234,-0.219252,-0.095118,0.134497,-0.157812,-0.380143,-0.066769,-0.453377,0.009172,-0.205702,-0.012834,0.543021
