In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [11]:
def bollinger(n):
    df['SMA'] = df['price_close'].rolling(n).mean()
    df['stdev'] = df['price_close'].rolling(n).std()
    df['bollinger'] = (df['price_close'] - df['SMA']) / df['stdev']
    df.drop(columns=['SMA', 'stdev'], inplace=True)

def EMA(n1, n2):
    df['EMA1'] = df['price_close'].ewm(span=n1, adjust=False).mean()
    df['EMA2'] = df['price_close'].ewm(span=n2, adjust=False).mean()
    df['EMAcross'] = df['EMA1'] - df['EMA2']
    df.drop(columns=['EMA1', 'EMA2'], inplace=True)

def RSI(n):
    df['price_close_prev'] = df['price_close'].shift()
    df['U'] = (df['price_close'] - df['price_close_prev']).clip(lower=0)
    df['D'] = (df['price_close_prev'] - df['price_close']).clip(lower=0)
    df['RSI'] = df['U'].rolling(n).mean() / (df['D'].rolling(n).mean() + df['U'].rolling(n).mean())
    df.drop(columns=['price_close_prev', 'U', 'D'], inplace=True)

def time():
    df['Hour'] = df.index.hour
    df['Day of Week'] = df.index.weekday + 1  # Monday=1, Sunday=7
    df['Month'] = df.index.month

from datetime import timedelta

def fib_ratio(window, df):
    fib_ratios = [0.236, 0.382, 0.5, 0.618, 0.786]
    fib_data = []

    for time in df.index[window:]:
        try:
            swing_high = df.loc[time - timedelta(days=window):time, 'price_close'].max()
            swing_low = df.loc[time - timedelta(days=window):time, 'price_close'].min()

            price_diff = swing_high - swing_low
            levels = [swing_high - ratio * price_diff for ratio in fib_ratios]

            fib_data.append([time] + levels)

        except Exception as e:
            print(f"Error processing {time}: {e}")
            continue

    columns = ['time_period_end'] + [f'fib_{int(ratio * 100)}' for ratio in fib_ratios]
    fib_df = pd.DataFrame(fib_data, columns=columns)
    fib_df.set_index('time_period_end', inplace=True)
    fib_df = fib_df.merge(df, how='left', left_index=True, right_index=True)

    return fib_df

In [12]:
coins = set(['arb', 'avax', 'btc', 'eth', 'matic', 'sol', 'tron'])
for coin in coins:
    filepath = f'../Data/data_segmented/{coin}_data.parquet'
    df = pd.read_parquet(filepath)
    df.set_index('index', inplace=True)
    df = df.iloc[1:]
    n = 24
    bollinger(n)
    EMA(20, 50)
    RSI(n)
    time()
    df = fib_ratio(n, df)
    outpath = f'../Data/data_segmented_tech/{coin}_data.parquet'
    df.to_parquet(outpath, engine="pyarrow", index=False)