# Base stats

> Statistical functions for portfolio analysis

In [None]:
#| default_exp stats

In [None]:
#| hide
from nbdev.showdoc import *
from warnings import warn
import pandas as pd
import numpy as np
from math import ceil, sqrt
from scipy.stats import (
    norm as _norm, linregress as _linregress
)

#from . import utils as _utils

In [None]:
# mock returns array
data = np.array([0.03, 0.01, 0.05, -0.01, -0.03])
returns = pd.Series(data)

In [None]:
#| export
def compsum(returns):
    """Calculates cumulative compounded returns up to each day, for series of daily returns"""
    return returns.add(1).cumprod() -1

In [None]:
compsum(returns)

0    0.030000
1    0.040300
2    0.092315
3    0.081392
4    0.048950
dtype: float64

In [None]:
#| export
def comp(returns):
    """Calculates total compounded return, for series of daily returns"""
    return returns.add(1).prod() -1

In [None]:
comp(returns)

0.048950094500000096

In [None]:
#| hide
# distribution function: need to decide specifics: should this be using standard deviation
# def distribution(returns, compounded=True, prepare_returns=True):
#     def get_outliers(data):
#         # https://datascience.stackexchange.com/a/57199
#         Q1 = data.quantile(0.25)
#         Q3 = data.quantile(0.75)
#         IQR = Q3 - Q1  # IQR is interquartile range.
#         filtered = (data >= Q1 - 1.5 * IQR) & (data <= Q3 + 1.5 * IQR)
#         return {
#             "values": data.loc[filtered].tolist(),
#             "outliers": data.loc[~filtered].tolist(),
#         }

#     if isinstance(returns, _pd.DataFrame):
#         warn("Pandas DataFrame was passed (Series expeted). "
#              "Only first column will be used.")
#         returns = returns.copy()
#         returns.columns = map(str.lower, returns.columns)
#         if len(returns.columns) > 1 and 'close' in returns.columns:
#             returns = returns['close']
#         else:
#             returns = returns[returns.columns[0]]

#     apply_fnc = comp if compounded else _np.sum
#     daily = returns.dropna()

#     if prepare_returns:
#         daily = _utils._prepare_returns(daily)

#     return {
#         "Daily": get_outliers(daily),
#         "Weekly": get_outliers(daily.resample('W-MON').apply(apply_fnc)),
#         "Monthly": get_outliers(daily.resample('M').apply(apply_fnc)),
#         "Quarterly": get_outliers(daily.resample('Q').apply(apply_fnc)),
#         "Yearly": get_outliers(daily.resample('A').apply(apply_fnc))
#     }

In [None]:
#| export
def expected_return(returns, aggregate=None, compounded=True,
                    prepare_returns=True):
    """
    Returns the expected geometric return for a given period
    by calculating the geometric holding period return
    """
    # if prepare_returns:
    #     returns = utils.prepare_returns(returns)
    # returns = utils.aggregate_returns(returns, aggregate, compounded)
    return np.product(1 + returns) ** (1 / len(returns)) - 1

The expected geometric return is:
$$ \left(\prod\limits_{i=1}^{n}(p_{i})\right)^{(1/n)} -1$$

where $p_{i}$ is 1+ the daily return: $p_{i}=\frac{v_{i}}{v_{i-1}}=1+r_{i}$, where $v_{i}$ and $r_{i}$ are value and return of the asset on day $i$ respectively

In [None]:
expected_return(returns, aggregate=None, compounded=True,
                    prepare_returns=True)

0.009603773872040255

In [None]:
#| export
def geometric_mean(retruns, aggregate=None, compounded=True):
    """Shorthand for expected_return()"""
    return expected_return(retruns, aggregate, compounded)


def ghpr(retruns, aggregate=None, compounded=True):
    """Shorthand for expected_return()"""
    return expected_return(retruns, aggregate, compounded)

In [None]:
#| export
def foo(): pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()