In [1]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

In [2]:
import numpy as np
import pandas as pd
import os
from helpers import load_parquet, standardize
from thermal_optimal_path.lattice import partition_function
from thermal_optimal_path.statistics import average_path
from datetime import datetime, timedelta

# Define the function to compute the weighted average

def compute_weighted_avg(year, month, day, crypto1, crypto2, previous_day_tail=None, temperature=0.1, minute=40):
    base_path = f'/home/elantonino/Big data project/data'
    crypto1_path = f'{base_path}/{crypto1}USDT/{crypto1}USDT-1s-{year}-{month}-{day}.parquet'
    crypto2_path = f'{base_path}/{crypto2}USDT/{crypto2}USDT-1s-{year}-{month}-{day}.parquet'

    crypto1_data = load_parquet(crypto1_path)
    crypto2_data = load_parquet(crypto2_path)

    crypto1_prices_std = standardize(crypto1_data['Open'])
    crypto2_prices_std = standardize(crypto2_data['Open'])

    if previous_day_tail is not None:
        crypto1_prices_std = np.concatenate((previous_day_tail['crypto1'], crypto1_prices_std))
        crypto2_prices_std = np.concatenate((previous_day_tail['crypto2'], crypto2_prices_std))

    total = len(crypto1_prices_std)
    weights = np.exp(np.linspace(0, 2, minute))
    weights /= weights.sum()

    df_avg = []
    for i in range(total - minute):
        end_time = i + minute
        crypto1 = crypto1_prices_std[i:end_time]
        crypto2 = crypto2_prices_std[i:end_time]

        g = partition_function(crypto1, crypto2, temperature)
        avg_path = average_path(g)[::2]
        weighted_avg = np.dot(avg_path, weights)
        df_avg.append(weighted_avg)

    return df_avg, {'crypto1': crypto1_prices_std[-minute:], 'crypto2': crypto2_prices_std[-minute:]}

def process_all_days(year):
    start_date = datetime(year, 1, 17)
    end_date = datetime(year, 12, 31)
    current_date = start_date

    output_folder = '/home/elantonino/Big data project/lead_lags'
    os.makedirs(output_folder, exist_ok=True)

    previous_day_tail = None

    while current_date <= end_date:
        day = current_date.day
        month = current_date.month
        day_str = f"{day:02d}"
        month_str = f"{month:02d}"

        print(f"Processing {year}-{month_str}-{day_str} for BTCUSDT and ETHUSDT")
        df_avg, current_day_tail = compute_weighted_avg(year, month_str, day_str, 'BTCUSDT', 'ETHUSDT', previous_day_tail)
        
        if df_avg:
            daily_df = pd.DataFrame({
                'Date': [current_date] * len(df_avg),
                'WeightedAvg': df_avg
            })
            output_path = f'{output_folder}/weighted_avg_{year}_{month_str}_{day_str}.parquet'
            daily_df.to_parquet(output_path, index=False)
            print(f"Results saved to {output_path}")

        previous_day_tail = current_day_tail

        current_date += timedelta(days=1)


In [3]:
process_all_days(2024)

OSError: [Errno 45] Operation not supported: '/home/elantonino'

In [18]:
file_path = f'/home/elantonino/Big data project/lead_lags/weighted_avg_2024_01_18.parquet'

data = pd.read_parquet(file_path)

In [19]:
data

Unnamed: 0,Date,WeightedAvg
0,2024-01-18,-1.061607
1,2024-01-18,-0.798687
2,2024-01-18,-0.406758
3,2024-01-18,0.137148
4,2024-01-18,0.511313
...,...,...
86395,2024-01-18,-1.698173
86396,2024-01-18,-1.685875
86397,2024-01-18,-1.664874
86398,2024-01-18,-1.634514


In [9]:
24*3600

86400