In [1]:
import sys
sys.path.append("..")
import Data as dt
import Temp_lib as lib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
wdo = dt.load_data(ticker="WDO")
wdo.drop(columns=["Unnamed: 0"], inplace=True)

In [3]:
# data = dt.load_dataList(ticker_list=['A'])
# data = data['A'].loc['2012-01-01':'2015-01-01']
# plt.figure(figsize=(17, 4))
# plt.plot(data['close'])
# plt.show()

In [5]:
close_series = wdo['close']
params = {
    "window": [5, 10, 30, 60],
    "smoothing_method": ["ewma"],
    "window_smooth": [5],
    "lambda_smooth": [0.2],
}
average_features = lib.average_feature(data=close_series, n_jobs=8, params=params).extract()

In [6]:
average_features

Unnamed: 0,average_5_ewma_5_0.2,average_10_ewma_5_0.2,average_30_ewma_5_0.2,average_60_ewma_5_0.2
4,,,,
5,,,,
6,,,,
7,,,,
8,,,,
...,...,...,...,...
131693,0.000018,0.000014,3.841721e-05,0.000058
131694,0.000012,0.000013,3.841721e-05,0.000057
131695,-0.000031,-0.000025,-9.467337e-07,0.000018
131696,-0.000020,-0.000014,6.925815e-06,0.000024


In [None]:
tripleBarrier_params = {
    "upper_barrier": [0.8],
    "lower_barrier": [1.5],
    "vertical_barrier": [20],
    "vol_window": [20],
    "smoothing_method": [None],
    "window_smooth": [5],
    "lambda_smooth": [0.2],
}

close_series = wdo['close']

tripleBarrier_labeller = lib.tripleBarrier_labeller(data=close_series, params=tripleBarrier_params, n_jobs=1)
labels_df = tripleBarrier_labeller.extract()
wdo.loc[:, 'labels'] = labels_df['set_0']
# lib.plot_price_with_labels(price_series=close_series, label_series=wdo['labels'])

In [None]:
def mpNumCoEvents(closeIdx,t1,molecule):
    """
    Compute the number of concurrent events per bar.
    +molecule[0] is the date of the first event on which the weight will be computed
    +molecule[-1] is the date of the last event on which the weight will be computed
    Any event that starts before t1[molecule].max() impacts the count.
    """
    #1) find events that span the period [molecule[0],molecule[-1]]
    t1=t1.fillna(closeIdx[-1]) # unclosed events still must impact other weights
    t1=t1[t1>=molecule[0]] # events that end at or after molecule[0]
    t1=t1.loc[:t1[molecule].max()] # events that start at or before t1[molecule].max()
    #2) count events spanning a bar
    iloc=closeIdx.searchsorted(np.array([t1.index[0],t1.max()]))
    count=pd.Series(0,index=closeIdx[iloc[0]:iloc[1]+1])
    
    for tIn,tOut in t1:
        count.loc[tIn:tOut]+=1
    
    return count.loc[molecule[0]:t1[molecule]].max()

In [None]:
labels_series = data['labels'].copy()
close_series = data['close'].copy()
window = 20
upper_barrier = 0.8
vertical_barrier = 20
#*________________________

# Drop NaN values from labels_series
labels = labels_series.dropna().copy()
close = close_series.loc[labels.index].copy()
vol = close_series.pct_change().rolling(window).std() * np.sqrt(window)
vol.rename('vol', inplace=True)

auxiliary = pd.concat([labels, close, vol], axis=1).dropna().reset_index()

for idx, row in auxiliary.iterrows():
    
    if row['labels'] == 1:
        current_close = row['close']
        barrier = row['vol'] * upper_barrier
        target_close = current_close * (1 + barrier)
        
        barrier_hit_idx = auxiliary[(auxiliary['close'] >= target_close) & (auxiliary.index > idx)].index.min()

    elif row['labels'] == -1:
        current_close = row['close']
        barrier = row['vol'] * upper_barrier
        target_close = current_close * (1 - barrier)
        
        barrier_hit_idx = auxiliary[(auxiliary['close'] <= target_close) & (auxiliary.index > idx)].index.min()
    else:
        barrier_hit_idx = vertical_barrier + idx

    auxiliary.loc[idx, 'start_event'] = idx
    auxiliary.loc[idx, 'end_event'] = barrier_hit_idx
    
auxiliary = auxiliary.drop(columns=['vol'])

auxiliary["concurrent_events"] = 0  # Initialize column

def count_concurrent_events(row, df):
    label = row['labels']
    start_idx = row['start_event']
    end_idx = row['end_event']
    
    mask_prev = (df['labels'] == label) & (df['start_event'] < start_idx) & (df['end_event'] > start_idx)
    mask_next = (df['labels'] == label) & (df['start_event'] < end_idx) & (df['end_event'] > end_idx)
    
    nb_concurrents_events_prev = df[mask_prev].shape[0]
    
    return nb_concurrents_events_prev if nb_concurrents_events_prev > 0 else 0

auxiliary['concurrent_events'] = auxiliary.apply(lambda row: count_concurrent_events(row, auxiliary), axis=1)

In [None]:
auxiliary

In [None]:
def label_uniqueness(labels_series: pd.Series, close_series: pd.Series):


    

In [None]:
closeIdx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
t1 = pd.Series({
    1: 4,  # Event starts at 1, ends at 4
    2: 5,  # Event starts at 2, ends at 5
    3: 6,  # Event starts at 3, ends at 6
    4: 7,  # Event starts at 4, ends at 7
    5: 8   # Event starts at 5, ends at 8
})
molecule = [2, 5]

# Call the function
result = mpNumCoEvents(closeIdx, t1, molecule)

# Print the result
print("Maximum number of concurrent events:", result)