# Utils

> Utility functions for portfolio analysis

In [None]:
#| default_exp utils

In [None]:
#| hide
import io as io
import datetime as dt
import pandas as pd
import numpy as np
import yfinance as yf
import hypersheets.stats as stats
import inspect

In [None]:
#| export
def mtd(df):
    """Restrict a dataframe to only month to date"""
    return df[df.index >= _dt.datetime.now(
    ).strftime('%Y-%m-01')]

In [None]:
#| export
def qtd(df):
    """Restrict a dataframe to only quarter to date (quarters starting in Jan, Apr, Jun, Oct) """
    return df[df.index >= _dt.datetime.now(
    ).strftime('%Y-%m-01')]

In [None]:
#| export
def ytd(df):
    """Restrict a dataframe to only year to date"""
    return df[df.index >= _dt.datetime.now(
    ).strftime('%Y-01-01')]

In [None]:
#| hide


In [None]:
#| export
def pandas_date(df, dates):
    """Filters a dataframe (with date as the index), to its values on specific days"""
    if not isinstance(dates, list):
        dates = [dates]
    return df[df.index.isin(dates)]

In [None]:
df = pd.DataFrame({
    'name': ['alice','bob','charlie','ryan'],
    'DoB': ['2005-10-15','2002-09-03','2001-01-01','1999-12-31']
})
df = df.set_index('DoB')
dates = ['2001-01-01','2005-10-15']
pandas_date(df, dates)

Unnamed: 0_level_0,name
DoB,Unnamed: 1_level_1
2005-10-15,alice
2001-01-01,charlie


In [None]:
#| export
def pandas_current_month(df):
    """an alternative method to mtd. remove?"""
    n = _dt.datetime.now()
    daterange = _pd.date_range(_dt.date(n.year, n.month, 1), n)
    return df[df.index.isin(daterange)]

In [None]:
#| export
def multi_shift(df, shift=3):
    """Get last N rows relative to another row in dataframe of values, with a sorted index"""
    if isinstance(df, pd.Series):
        df = pd.DataFrame(df)

    dfs = [df.shift(i) for i in np.arange(shift)]
    for ix, dfi in enumerate(dfs[1:]):
        dfs[ix + 1].columns = [str(col) for col in dfi.columns + str(ix + 1)]
    return pd.concat(dfs, axis = 1, sort=True)

In [None]:
#| export
df = pd.DataFrame({
    'value': [10,15,13,7,12,6],
    'Date': ['1999-10-15','1999-10-16','1999-10-17','1999-10-18','1999-10-19','1999-10-20']
})
df = df.set_index('Date')
multi_shift(df,3)

Unnamed: 0_level_0,value,value1,value2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1999-10-15,10,,
1999-10-16,15,10.0,
1999-10-17,13,15.0,10.0
1999-10-18,7,13.0,15.0
1999-10-19,12,7.0,13.0
1999-10-20,6,12.0,7.0


In [None]:
#| export
def to_returns(prices, rf=0.):
    """Calculates the simple arithmetic returns of a price series"""
    return _prepare_returns(prices, rf)

In [None]:
#| export
def to_prices(returns, base=1e5):
    """Converts returns series to price data"""
    returns = returns.copy().fillna(0).replace(
        [np.inf, -np.inf], float('NaN'))

    return base + base * stats.compsum(returns)

In [None]:
# mock returns array
data = np.array([0.1, -0.2, 0.25, 0.5, -0.8])
returns = pd.Series(data)

In [None]:
to_prices(returns, base=100)

0    110.0
1     88.0
2    110.0
3    165.0
4     33.0
dtype: float64

In [None]:
#| export
def log_returns(returns, rf=0., nperiods=None):
    """Shorthand for to_log_returns"""
    return to_log_returns(returns, rf, nperiods)

In [None]:
#| export
def to_log_returns(returns, rf=0., nperiods=None):
    """Converts returns series to log returns"""
    returns = prepare_returns(returns, rf, nperiods)
    try:
        return np.log(returns+1).replace([np.inf, -np.inf], float('NaN'))
    except Exception:
        return 0.

In [None]:
#| export
def exponential_stdev(returns, window=30, is_halflife=False):
    """Returns series representing exponential volatility of returns"""
    returns = _prepare_returns(returns)
    halflife = window if is_halflife else None
    return returns.ewm(com=None, span=window,
                       halflife=halflife, min_periods=window).std()

In [None]:
#| export
def rebase(prices, base=100.):
    """
    Rebase all series to a given intial base.
    This makes comparing/plotting different series together easier.
    Args:
        * prices: Expects a price series/dataframe
        * base (number): starting value for all series.
    """
    return prices.dropna() / prices.dropna().iloc[0] * base

In [None]:
#| export
def group_returns(returns, groupby, compounded=True):
    """Summarize returns
    group_returns(df, df.index.year)
    group_returns(df, [df.index.year, df.index.month])
    """
    if compounded:
        return returns.groupby(groupby).apply(_stats.comp)
    return returns.groupby(groupby).sum()

In [None]:
#| export
def aggregate_returns(returns, period=None, compounded=True):
    """Aggregates returns based on date periods"""
    if period is None or 'day' in period:
        return returns
    index = returns.index

    if 'month' in period:
        return group_returns(returns, index.month, compounded=compounded)

    if 'quarter' in period:
        return group_returns(returns, index.quarter, compounded=compounded)

    if period == "A" or any(x in period for x in ['year', 'eoy', 'yoy']):
        return group_returns(returns, index.year, compounded=compounded)

    if 'week' in period:
        return group_returns(returns, index.week, compounded=compounded)

    if 'eow' in period or period == "W":
        return group_returns(returns, [index.year, index.week],
                             compounded=compounded)

    if 'eom' in period or period == "M":
        return group_returns(returns, [index.year, index.month],
                             compounded=compounded)

    if 'eoq' in period or period == "Q":
        return group_returns(returns, [index.year, index.quarter],
                             compounded=compounded)

    if not isinstance(period, str):
        return group_returns(returns, period, compounded)

    return returns

In [None]:
#| export
def to_excess_returns(returns, rf, nperiods=None):
    """
    Calculates excess returns by subtracting
    risk-free returns from total returns
    Args:
        * returns (Series, DataFrame): Returns
        * rf (float, Series, DataFrame): Risk-Free rate(s)
        * nperiods (int): Optional. If provided, will convert rf to different
            frequency using deannualize
    Returns:
        * excess_returns (Series, DataFrame): Returns - rf
    """
    if isinstance(rf, int):
        rf = float(rf)

    if not isinstance(rf, float):
        rf = rf[rf.index.isin(returns.index)]

    if nperiods is not None:
        # deannualize
        rf = _np.power(1 + rf, 1. / nperiods) - 1.

    return returns - rf

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()