Skip to content

Commit

Permalink
Add annualized vol
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnlinxl committed Jul 1, 2020
1 parent a6fe2ed commit b9ed512
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 29 deletions.
45 changes: 45 additions & 0 deletions pyform/returns/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import math
import pandas as pd
from typing import Optional, Union
from pyform.util.freq import calc_samples_per_year


def calc_ann_vol(
series: Union[pd.DataFrame, pd.Series],
method: str,
samples_per_year: Optional[int] = None,
) -> float:
"""Computes annualized volatility of a time indexed pandas series
Args:
series: a time indexed pandas DataFrame or Series of returns
method: {'sample', 'population'}. method used to compute volatility
(standard deviation). Defaults to "sample".
samples_per_year: Useful when you want to specify how many samples are there
per year so annualization can be done properly. If None, this will be
computed by using the series supplied. Defaults to None.
Returns:
float: annualized volatility
"""

# delta degrees of freedom, used for calculate standard deviation
ddof = {"sample": 1, "population": 0}[method]

if samples_per_year is None:
samples_per_year = calc_samples_per_year(
len(series.index), min(series.index), max(series.index)
)

# Compute per period standard deviation
if isinstance(series, pd.DataFrame):
returns = series.iloc[:, 0]
elif isinstance(series, pd.Series):
returns = series

vol = returns.std(ddof=ddof)

# Annualize to annual volatility
vol *= math.sqrt(samples_per_year)

return vol
96 changes: 73 additions & 23 deletions pyform/returnseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from typing import Optional, Union, Dict
from pyform.timeseries import TimeSeries
from pyform.returns.compound import compound, ret_to_period
from pyform.util.freq import is_lower_freq
from pyform.returns.metrics import calc_ann_vol
from pyform.util.freq import is_lower_freq, calc_samples_per_year


class ReturnSeries(TimeSeries):
Expand Down Expand Up @@ -410,7 +411,7 @@ def get_ann_ret(

return result

def get_ann_vol(
def calc_ann_vol(
self,
freq: Optional[str] = "M",
include_bm: Optional[bool] = True,
Expand Down Expand Up @@ -452,12 +453,6 @@ def get_ann_vol(
# Columns in the returned dataframe
names, ann_vol, start, end = ([] for i in range(4))

# delta degrees of freedom, used for calculate standard deviation
ddof = {"sample": 1, "population": 0}[method]

# datetime representation of number of days in 1 year
one_year = pd.to_timedelta(365.25, unit="D")

run_name = [self.name]
run_data = [self]

Expand All @@ -478,18 +473,10 @@ def get_ann_vol(

# Convert return to desired frequency
ret = series.to_period(freq=freq, method=compound_method)

# Compute the duration of the series in terms of number of years
years = (series.end - series.start) / one_year

# Get number of data points per year
sample_per_year = len(ret.index) / years

# Compute per period standard deviation
vol = ret.iloc[:, 0].std(ddof=ddof)

# Annualize to annual volatility
vol *= math.sqrt(sample_per_year)
samples_per_year = calc_samples_per_year(
len(ret.index), series.start, series.end
)
vol = calc_ann_vol(ret, method, samples_per_year)

names.append(name)
ann_vol.append(vol)
Expand Down Expand Up @@ -630,7 +617,7 @@ def get_sharpe(
ann_excess_ret = exccess_series.get_ann_ret(
method=compound_method, include_bm=False
)["value"][0]
ann_series_vol = series.get_ann_vol(
ann_series_vol = series.calc_ann_vol(
freq=freq, compound_method=compound_method, include_bm=False
)["value"][0]
ratio = ann_excess_ret / ann_series_vol
Expand Down Expand Up @@ -723,8 +710,71 @@ def get_rolling_tot_ret(
self.align_daterange(series)

# compute rolling total return
series_in_freq = series.to_period(freq=freq, method=method)
roll_result = series_in_freq.rolling(window).apply(compound(method))
ret = series.to_period(freq=freq, method=method)
roll_result = ret.rolling(window).apply(compound(method))
roll_result = roll_result.dropna()

# store result in dictionary
result[name] = roll_result

# reset series date range
series.set_daterange(series_start, series_end)

return result

def get_rolling_ann_vol(
self,
window: Optional[int] = 36,
freq: Optional[str] = "M",
method: Optional[str] = "sample",
include_bm: Optional[bool] = True,
compound_method: Optional[str] = "geometric",
) -> Dict[str, pd.DataFrame]:
"""Computes rolling volatility (standard deviation) of the series
Args:
window: the rolling window. Defaults to 36.
freq: Returns are converted to the same frequency before volatility
is compuated. Defaults to "M".
method: {'sample', 'population'}. method used to compute volatility
(standard deviation). Defaults to "sample".
include_bm: whether to compute rolling volatility for
benchmarks as well. Defaults to True.
compound_method: method to use when compounding return.
Defaults to "geometric".
Returns:
Dict[pd.DataFrame]: dictionary of rolling total returns
* key: name of the series
* value: rolling total returns, in a datetime indexed pandas dataframe
"""

# Store result in dictionary
result = dict()

# Columns in the returned dataframe
run_name = [self.name]
run_data = [self]

if include_bm:
run_name += list(self.benchmark.keys())
run_data += list(self.benchmark.values())

for name, series in zip(run_name, run_data):

# keep record of start and so they can be reset later
series_start = series.start
series_end = series.end

# modify series so it's in the same timerange as the main series
self.align_daterange(series)

# compute rolling total return
ret = series.to_period(freq=freq, method=compound_method)
roll_result = ret.rolling(window).apply(
lambda x: calc_ann_vol(x, method=method)
)
roll_result = roll_result.dropna()

# store result in dictionary
Expand Down
40 changes: 40 additions & 0 deletions pyform/util/freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,43 @@ def infer_freq(series: pd.DataFrame, use: Optional[int] = 50) -> str:
raise ValueError(f"Multiple series frequency detected: {freq}")

return freq.pop()


def calc_timedelta_in_years(start, end) -> float:
"""Computes timedelta between start and end, in years
Args:
start: start date
end: end date
Returns:
float: time delta in number of years
"""

# datetime representation of number of days in 1 year
one_year = pd.to_timedelta(365.25, unit="D")

# Compute the duration of the series in terms of number of years
years = (end - start) / one_year

return years


def calc_samples_per_year(num_samples: int, start, end) -> float:
"""Computes number of data points per year, given time range
Args:
num_samples: total number of samples
start: start date of samples
end: end date of samples
Returns:
float: average number of samples per year
"""

years = calc_timedelta_in_years(start, end)

# Get number of data points per year
samples_per_year = num_samples / years

return samples_per_year
38 changes: 32 additions & 6 deletions tests/unit/test_returnseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_annualized_volatility():
returns = ReturnSeries.read_csv("tests/unit/data/twitter_returns.csv")

# No benchmark
ann_vol = returns.get_ann_vol()
ann_vol = returns.calc_ann_vol()
expected_output = pd.DataFrame(
data={
"name": ["TWTR"],
Expand All @@ -247,7 +247,7 @@ def test_annualized_volatility():
assert ann_vol.equals(expected_output)

# daily volatility
ann_vol = returns.get_ann_vol(freq="D", meta=True)
ann_vol = returns.calc_ann_vol(freq="D", meta=True)
expected_output = pd.DataFrame(
data={
"name": ["TWTR"],
Expand All @@ -262,7 +262,7 @@ def test_annualized_volatility():
assert ann_vol.equals(expected_output)

# population standard deviation
ann_vol = returns.get_ann_vol(method="population", meta=True)
ann_vol = returns.calc_ann_vol(method="population", meta=True)
expected_output = pd.DataFrame(
data={
"name": ["TWTR"],
Expand All @@ -278,7 +278,7 @@ def test_annualized_volatility():

# with single benchmark
returns.add_bm(spy)
ann_vol = returns.get_ann_vol()
ann_vol = returns.calc_ann_vol()
expected_output = pd.DataFrame(
data={
"name": ["TWTR", "SPY"],
Expand All @@ -289,7 +289,7 @@ def test_annualized_volatility():
assert ann_vol.equals(expected_output)

# daily volatility
ann_vol = returns.get_ann_vol(freq="D", meta=True)
ann_vol = returns.calc_ann_vol(freq="D", meta=True)
expected_output = pd.DataFrame(
data={
"name": ["TWTR", "SPY"],
Expand All @@ -304,7 +304,7 @@ def test_annualized_volatility():
assert ann_vol.equals(expected_output)

# has benchmark, but include_bm=False
ann_vol = returns.get_ann_vol(include_bm=False)
ann_vol = returns.calc_ann_vol(include_bm=False)
expected_output = pd.DataFrame(
data={
"name": ["TWTR"],
Expand Down Expand Up @@ -408,6 +408,32 @@ def test_rolling_tot_ret():
assert roll_spy["SPY"][0] == 0.1996927920869329


def test_rolling_ann_vol():

returns = ReturnSeries.read_csv("tests/unit/data/twitter_returns.csv")

# No benchmark
roll_ann_vol = returns.get_rolling_ann_vol()
roll_twtr = roll_ann_vol["TWTR"]
assert roll_twtr.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
assert roll_twtr["TWTR"][0] == 0.5791236929456373

# Daily, rolling 252 days
roll_ann_vol = returns.get_rolling_ann_vol(window=252, freq="D")
roll_twtr = roll_ann_vol["TWTR"]
assert roll_twtr.index[0] == datetime.datetime.strptime("2014-11-06", "%Y-%m-%d")
assert roll_twtr["TWTR"][0] == 0.639203890663799

returns.add_bm(spy)
roll_ann_vol = returns.get_rolling_ann_vol()
roll_twtr = roll_ann_vol["TWTR"]
roll_spy = roll_ann_vol["SPY"]
assert roll_twtr.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
assert roll_twtr["TWTR"][0] == 0.5791236929456373
assert roll_spy.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
assert roll_spy["SPY"][0] == 0.10935207559750833


def test_libor_fred():

CashSeries.read_fred_libor_1m()

0 comments on commit b9ed512

Please sign in to comment.