## Analytics

This file implements statistical analysis & trading indicators on OHLC data

In [1]:
# Imports

import matplotlib.pyplot as plt
import scipy.stats as stats
import mplfinance as mpf
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
class Analytics:
    @staticmethod
    def moving_average(df: pd.DataFrame, column: str, window: int):
        """
        Computing simple moving average (SMA) over the 'column' price.
        """
        if column not in df.columns:
            raise ValueError(f"The column {column} does not appear in {list(df.columns)}")
        
        # Using the pandas built-in moving average function
        return df[column].rolling(window=window).mean()

    @staticmethod
    def exponential_ma(df: pd.DataFrame, column: str, window: int):
        """
        Computing exponential moving average (EMA) over the 'column' price.
        """
        if column not in df.columns:
            raise ValueError(f"The column {column} does not appear in {list(df.columns)}")
        
        # Using the pandas built-in exponential moving average function
        return ...

    @staticmethod
    def bollinger_bands(df: pd.DataFrame, column: str, window: int, factor: float = 2):
        """
        Computing Bollinger Bands (middle, upper, lower) over the 'column' price.
        """
        if column not in df.columns:
            raise ValueError(f"DataFrame must contain a {column} column")

        ma = ...
        std = ...
        
        # Moving average, upper band, lower band
        return ma, ..., ...

In [3]:
def correlation_matrix(data: dict, column: str):
    """
    Computing the correlations of products based on a certain column
    """
    # Extract closing prices into a single DataFrame
    values = {}

    for symbol, df in data.items():
        d = df[column]

        if d.isna().any():
            continue

        values[symbol] = d

    prices = pd.concat(
        values,
        axis=1
    ).dropna()

    # Compute returns
    returns = ...

    # Correlation matrix
    corr = ...

    return corr

In [4]:
def compute_stats(data: dict[pd.DataFrame]):
    """
    Computes basic statistics on the passed dict containing OHLC data
    """
    stats = {}
    
    for symbol, df in data.items():
        returns = df["CLOSE"].pct_change()  # Daily returns
        
        stats[symbol] = {
            "CLOSE_MEAN": ...,
            "CLOSE_STD": ...,
            "CLOSE_MIN": ...,
            "CLOSE_MAX": ...,
            "VOLUME_MEAN": ...,
            "RETURN_MEAN": ...,
            "RETURN_STD": ...
        }
        
    return pd.DataFrame(stats)

In [5]:
def plot_return_stats(stats_df: pd.DataFrame):
    """
    Plots RETURN_MEAN and RETURN_STD only, ordered by value.
    """
    metrics = {"RETURN_MEAN": "Average mean return", "RETURN_STD": "Return standard deviation"}
    colors = ('purple', 'plum')
    
    fig, axes = plt.subplots(len(metrics), 1, figsize=(10, 6))
    
    for ax, (metric, alias), color in zip(axes, metrics.items(), colors):
        values = stats_df.loc[metric].sort_values(ascending=False)
        values.plot(kind="bar", ax=ax, color=color)
        ax.set_title(alias)
        ax.set_ylabel("Value")
        ax.grid(True, linestyle="--", alpha=0.6)
        ax.set_xticklabels(values.index, rotation=60, ha="center")
        
    plt.tight_layout()
    plt.show()

In [6]:
def distribution_analysis(df: pd.DataFrame):
    """
    Analyze the distribution of daily returns for a given symbol.
    
    Includes histogram, skewness, kurtosis, QQ-plots, and normality test.
    """
    # Close price returns
    returns = ...
    
    # Compute skewness and kurtosis
    skewness = ...
    kurtosis = ...
    
    # Normality test (Shapiro-Wilk)
    stat, p_value = ...
        
    # Compute returns mean and std
    mu, sigma = ...
    
    # Create figure with GridSpec for one column (hist) and one column (two QQ plots)
    fig = plt.figure(figsize=(14, 6))
    gs = fig.add_gridspec(2, 2, width_ratios=[1, 1], height_ratios=[1, 1])
    
    # Histogram on the left (spanning both rows)
    ax_hist = fig.add_subplot(gs[:, 0])
    n, bins, _ = ax_hist.hist(returns, bins=50, density=True, color="lightblue", edgecolor="black", alpha=0.7)
    x = np.linspace(bins[0], bins[-1], 100)
    ax_hist.plot(x, stats.norm.pdf(x, mu, sigma), 'r--', linewidth=2, label='Theoretical normal PDF')
    ax_hist.set_title("Daily Returns Histogram")
    ax_hist.set_xlabel("Daily Return")
    ax_hist.set_ylabel("Density")
    ax_hist.legend()
    
    # QQ-plot of actual data (top-right)
    ax_qq1 = fig.add_subplot(gs[0, 1])
    res = stats.probplot(returns, dist="norm")
    ax_qq1.plot(res[0][0], res[0][1], 'o', color='blue', label='Returns')
    ax_qq1.plot(res[0][0], res[1][0]*res[0][0] + res[1][1], 'r--', linewidth=2, label='Fit')
    ax_qq1.set_title("QQ-Plot: Data")
    ax_qq1.legend()
    
    # QQ-plot of theoretical normal (bottom-right)
    ax_qq2 = fig.add_subplot(gs[1, 1])
    normal_sample = np.random.normal(mu, sigma, 500)
    res_norm = stats.probplot(normal_sample, dist="norm")
    ax_qq2.plot(res_norm[0][0], res_norm[0][1], 'o', color='green', label='Theoretical Normal Sample')
    ax_qq2.plot(res_norm[0][0], res_norm[1][0]*res_norm[0][0] + res_norm[1][1], 'r--', linewidth=2, label='Fit')
    ax_qq2.set_title("Normal distribution QQ-plot")
    ax_qq2.legend()
    
    plt.tight_layout()
    plt.show()
    
    return skewness, kurtosis, p_value

In [7]:
def candlesticks(symbol_df: pd.DataFrame):
    """
    Plotting candelstick chart
    """
    # Check that the following columns are in the dataframe columns
    for column_name in ('Open', 'High', 'Low', 'Close', 'Volume', 'Timestamp'):
        assert ..., f"Dataframe must contain a column {column_name}"

    symbol_df.index = [pd.to_datetime(d) for d in symbol_df['Timestamp']]

    # Plot candlestick chart
    mpf.plot(..., 
             type=..., 
             volume=True, 
             style='binance', 
             title='CANDELSTICK CHART', 
             ylabel='PRICE', 
             ylabel_lower='VOLUME')