In [2]:
import pandas as pd
import numpy as np

filename1 = 'prices_round_1_day_-2.csv'
filename2 = 'prices_round_1_day_-1.csv'
filename3 = 'prices_round_1_day_0.csv'

## Preprocessing

file1 = pd.read_csv(filename1, sep=';')  ## The used separator is ;
file1 = file1[file1['product'] == 'STARFRUIT']  ## Only use amethyst data

## Same for the other files
file2 = pd.read_csv(filename2, sep=';')
file2 = file2[file2['product'] == 'STARFRUIT']

file3 = pd.read_csv(filename3, sep=';')
file3 = file3[file3['product'] == 'STARFRUIT']

file2['timestamp'] = file2['timestamp'].apply(lambda x: x + 1000000)
file3['timestamp'] = file3['timestamp'].apply(lambda x: x + 2000000)

data = pd.concat([file1, file2, file3])  ## Three days merged into 1 frame

#for data, add some of the other indicators, namely sentiment, average sentiment, rsi, macd, volume

# Calculate 'sentiment'
data['sentiment'] = ((data['mid_price'] - data['bid_price_1']) * data['bid_volume_1']) / (
    ((data['mid_price'] - data['bid_price_1']) * data['bid_volume_1']) +
    ((-data['mid_price'] + data['ask_price_1']) * data['ask_volume_1'])
)

# Calculate 'average_sentiment' using a rolling window of 3, applying the mean function
data['average_sentiment'] = data['sentiment'].rolling(window=3, min_periods=1).mean()

In [4]:
data['average_sentiment'].head()
#this lines up with R calulation

1    0.50000
3    0.35000
4    0.24375
7    0.24375
9    0.34375
Name: average_sentiment, dtype: float64

In [8]:
def calculate_rsi(prices, period=5):
    # Calculate daily price changes
    delta = prices.diff()

    # Separate gains and losses
    gain = delta.clip(lower=0)
    loss = -1 * delta.clip(upper=0)

    # Calculate average gains and losses using EMA
    avg_gain = gain.ewm(com=period-1, min_periods=period).mean()
    avg_loss = loss.ewm(com=period-1, min_periods=period).mean()

    # Calculate RS
    rs = avg_gain / avg_loss

    # Calculate RSI
    rsi = 100 - (100 / (1 + rs))

    return rsi

# Assuming 'data' is your DataFrame and 'mid_price' is the column of interest
data['RSI'] = calculate_rsi(data['mid_price'])

# You can now view the DataFrame to see the 'RSI' column added
print(data['RSI'].head(20))


1           NaN
3           NaN
4           NaN
7           NaN
9           NaN
11    66.827578
12    45.047037
15    49.187276
17    65.454306
19    65.454306
20    38.495750
22    42.709357
25    47.228522
26    62.154894
28    38.397257
30    48.867528
33    48.867528
34    44.893423
36    37.308273
38    66.016972
Name: RSI, dtype: float64


In [19]:
def calculate_macd(close_prices, short_window=12, long_window=26, signal_window=9):
    """
    Calculate MACD and Signal Line indicators.
    
    :param close_prices: Pandas Series of close prices.
    :param short_window: Window length for the short-term EMA (default 12).
    :param long_window: Window length for the long-term EMA (default 26).
    :param signal_window: Window length for the signal line (default 9).
    :return: DataFrame containing the MACD and Signal Line.
    """
    
    # Calculate the Short Term Exponential Moving Average
    ShortEMA = close_prices.ewm(span=short_window, adjust=False).mean()
    # Calculate the Long Term Exponential Moving Average
    LongEMA = close_prices.ewm(span=long_window, adjust=False).mean()
    # Calculate the Moving Average Convergence/Divergence (MACD)
    MACD = ShortEMA - LongEMA
    # Calculate the Signal Line
    signal = MACD.ewm(span=signal_window, adjust=False).mean()
    
    # Compile and return the MACD data as a DataFrame
    macd_df = pd.DataFrame(index=close_prices.index)
    macd_df['MACD'] = MACD
    #macd_df['Signal Line'] = signal
    
    return macd_df

# Example usage:
# Assuming 'data' is your DataFrame and it has a 'mid_price' column
# Calculate MACD for 'mid_price'
macd_data = calculate_macd(data['mid_price'])

In [25]:
def calculate_rsi(prices, period=5):
    # Convert list to pandas Series
    prices_series = pd.Series(prices)

    # Calculate daily price changes
    delta = prices_series.diff()

    # Separate gains and losses
    gain = delta.clip(lower=0)
    loss = -1 * delta.clip(upper=0)

    # Calculate average gains and losses using EMA
    avg_gain = gain.ewm(com=period-1, min_periods=period).mean()
    avg_loss = loss.ewm(com=period-1, min_periods=period).mean()

    # Calculate RS
    rs = avg_gain / avg_loss

    # Calculate RSI
    rsi = 100 - (100 / (1 + rs))

    return rsi

print(calculate_rsi(data['mid_price'].head(6)))

1           NaN
3           NaN
4           NaN
7           NaN
9           NaN
11    66.827578
Name: mid_price, dtype: float64


In [26]:
data['mid_price'].head(6)

1     5000.0
3     5002.0
4     5000.0
7     4999.5
9     4998.5
11    5002.0
Name: mid_price, dtype: float64

In [29]:
calculate_rsi([5040.5, 5040.5, 5040.5, 5040.5, 5039.5, 5039.5, 5039.5, 5039.5, 5040.5, 5039.5])

0          NaN
1          NaN
2          NaN
3          NaN
4          NaN
5     0.000000
6     0.000000
7     0.000000
8    70.942111
9    37.599639
dtype: float64