Add annualized vol

PyFund · Jul 1, 2020 · b9ed512 · b9ed512
1 parent a6fe2ed
commit b9ed512
Show file tree

Hide file tree

Showing 4 changed files with 190 additions and 29 deletions.
diff --git a/pyform/returns/metrics.py b/pyform/returns/metrics.py
@@ -0,0 +1,45 @@
+import math
+import pandas as pd
+from typing import Optional, Union
+from pyform.util.freq import calc_samples_per_year
+
+
+def calc_ann_vol(
+    series: Union[pd.DataFrame, pd.Series],
+    method: str,
+    samples_per_year: Optional[int] = None,
+) -> float:
+    """Computes annualized volatility of a time indexed pandas series
+
+    Args:
+        series: a time indexed pandas DataFrame or Series of returns
+        method: {'sample', 'population'}. method used to compute volatility
+            (standard deviation). Defaults to "sample".
+        samples_per_year: Useful when you want to specify how many samples are there
+            per year so annualization can be done properly. If None, this will be
+            computed by using the series supplied. Defaults to None.
+
+    Returns:
+        float: annualized volatility
+    """
+
+    # delta degrees of freedom, used for calculate standard deviation
+    ddof = {"sample": 1, "population": 0}[method]
+
+    if samples_per_year is None:
+        samples_per_year = calc_samples_per_year(
+            len(series.index), min(series.index), max(series.index)
+        )
+
+    # Compute per period standard deviation
+    if isinstance(series, pd.DataFrame):
+        returns = series.iloc[:, 0]
+    elif isinstance(series, pd.Series):
+        returns = series
+
+    vol = returns.std(ddof=ddof)
+
+    # Annualize to annual volatility
+    vol *= math.sqrt(samples_per_year)
+
+    return vol
diff --git a/pyform/returnseries.py b/pyform/returnseries.py
@@ -8,7 +8,8 @@
 from typing import Optional, Union, Dict
 from pyform.timeseries import TimeSeries
 from pyform.returns.compound import compound, ret_to_period
-from pyform.util.freq import is_lower_freq
+from pyform.returns.metrics import calc_ann_vol
+from pyform.util.freq import is_lower_freq, calc_samples_per_year
 
 
 class ReturnSeries(TimeSeries):
@@ -410,7 +411,7 @@ def get_ann_ret(
 
         return result
 
-    def get_ann_vol(
+    def calc_ann_vol(
         self,
         freq: Optional[str] = "M",
         include_bm: Optional[bool] = True,
@@ -452,12 +453,6 @@ def get_ann_vol(
         # Columns in the returned dataframe
         names, ann_vol, start, end = ([] for i in range(4))
 
-        # delta degrees of freedom, used for calculate standard deviation
-        ddof = {"sample": 1, "population": 0}[method]
-
-        # datetime representation of number of days in 1 year
-        one_year = pd.to_timedelta(365.25, unit="D")
-
         run_name = [self.name]
         run_data = [self]
 
@@ -478,18 +473,10 @@ def get_ann_vol(
 
                 # Convert return to desired frequency
                 ret = series.to_period(freq=freq, method=compound_method)
-
-                # Compute the duration of the series in terms of number of years
-                years = (series.end - series.start) / one_year
-
-                # Get number of data points per year
-                sample_per_year = len(ret.index) / years
-
-                # Compute per period standard deviation
-                vol = ret.iloc[:, 0].std(ddof=ddof)
-
-                # Annualize to annual volatility
-                vol *= math.sqrt(sample_per_year)
+                samples_per_year = calc_samples_per_year(
+                    len(ret.index), series.start, series.end
+                )
+                vol = calc_ann_vol(ret, method, samples_per_year)
 
                 names.append(name)
                 ann_vol.append(vol)
@@ -630,7 +617,7 @@ def get_sharpe(
                 ann_excess_ret = exccess_series.get_ann_ret(
                     method=compound_method, include_bm=False
                 )["value"][0]
-                ann_series_vol = series.get_ann_vol(
+                ann_series_vol = series.calc_ann_vol(
                     freq=freq, compound_method=compound_method, include_bm=False
                 )["value"][0]
                 ratio = ann_excess_ret / ann_series_vol
@@ -723,8 +710,71 @@ def get_rolling_tot_ret(
             self.align_daterange(series)
 
             # compute rolling total return
-            series_in_freq = series.to_period(freq=freq, method=method)
-            roll_result = series_in_freq.rolling(window).apply(compound(method))
+            ret = series.to_period(freq=freq, method=method)
+            roll_result = ret.rolling(window).apply(compound(method))
+            roll_result = roll_result.dropna()
+
+            # store result in dictionary
+            result[name] = roll_result
+
+            # reset series date range
+            series.set_daterange(series_start, series_end)
+
+        return result
+
+    def get_rolling_ann_vol(
+        self,
+        window: Optional[int] = 36,
+        freq: Optional[str] = "M",
+        method: Optional[str] = "sample",
+        include_bm: Optional[bool] = True,
+        compound_method: Optional[str] = "geometric",
+    ) -> Dict[str, pd.DataFrame]:
+        """Computes rolling volatility (standard deviation) of the series
+
+        Args:
+            window: the rolling window. Defaults to 36.
+            freq: Returns are converted to the same frequency before volatility
+                is compuated. Defaults to "M".
+            method: {'sample', 'population'}. method used to compute volatility
+                (standard deviation). Defaults to "sample".
+            include_bm: whether to compute rolling volatility for
+                benchmarks as well. Defaults to True.
+            compound_method: method to use when compounding return.
+                Defaults to "geometric".
+
+        Returns:
+            Dict[pd.DataFrame]: dictionary of rolling total returns
+
+                * key: name of the series
+                * value: rolling total returns, in a datetime indexed pandas dataframe
+        """
+
+        # Store result in dictionary
+        result = dict()
+
+        # Columns in the returned dataframe
+        run_name = [self.name]
+        run_data = [self]
+
+        if include_bm:
+            run_name += list(self.benchmark.keys())
+            run_data += list(self.benchmark.values())
+
+        for name, series in zip(run_name, run_data):
+
+            # keep record of start and so they can be reset later
+            series_start = series.start
+            series_end = series.end
+
+            # modify series so it's in the same timerange as the main series
+            self.align_daterange(series)
+
+            # compute rolling total return
+            ret = series.to_period(freq=freq, method=compound_method)
+            roll_result = ret.rolling(window).apply(
+                lambda x: calc_ann_vol(x, method=method)
+            )
             roll_result = roll_result.dropna()
 
             # store result in dictionary

diff --git a/pyform/util/freq.py b/pyform/util/freq.py
@@ -71,3 +71,43 @@ def infer_freq(series: pd.DataFrame, use: Optional[int] = 50) -> str:
         raise ValueError(f"Multiple series frequency detected: {freq}")
 
     return freq.pop()
+
+
+def calc_timedelta_in_years(start, end) -> float:
+    """Computes timedelta between start and end, in years
+
+    Args:
+        start: start date
+        end: end date
+
+    Returns:
+        float: time delta in number of years
+    """
+
+    # datetime representation of number of days in 1 year
+    one_year = pd.to_timedelta(365.25, unit="D")
+
+    # Compute the duration of the series in terms of number of years
+    years = (end - start) / one_year
+
+    return years
+
+
+def calc_samples_per_year(num_samples: int, start, end) -> float:
+    """Computes number of data points per year, given time range
+
+    Args:
+        num_samples: total number of samples
+        start: start date of samples
+        end: end date of samples
+
+    Returns:
+        float: average number of samples per year
+    """
+
+    years = calc_timedelta_in_years(start, end)
+
+    # Get number of data points per year
+    samples_per_year = num_samples / years
+
+    return samples_per_year
diff --git a/tests/unit/test_returnseries.py b/tests/unit/test_returnseries.py
@@ -236,7 +236,7 @@ def test_annualized_volatility():
     returns = ReturnSeries.read_csv("tests/unit/data/twitter_returns.csv")
 
     # No benchmark
-    ann_vol = returns.get_ann_vol()
+    ann_vol = returns.calc_ann_vol()
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR"],
@@ -247,7 +247,7 @@ def test_annualized_volatility():
     assert ann_vol.equals(expected_output)
 
     # daily volatility
-    ann_vol = returns.get_ann_vol(freq="D", meta=True)
+    ann_vol = returns.calc_ann_vol(freq="D", meta=True)
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR"],
@@ -262,7 +262,7 @@ def test_annualized_volatility():
     assert ann_vol.equals(expected_output)
 
     # population standard deviation
-    ann_vol = returns.get_ann_vol(method="population", meta=True)
+    ann_vol = returns.calc_ann_vol(method="population", meta=True)
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR"],
@@ -278,7 +278,7 @@ def test_annualized_volatility():
 
     # with single benchmark
     returns.add_bm(spy)
-    ann_vol = returns.get_ann_vol()
+    ann_vol = returns.calc_ann_vol()
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR", "SPY"],
@@ -289,7 +289,7 @@ def test_annualized_volatility():
     assert ann_vol.equals(expected_output)
 
     # daily volatility
-    ann_vol = returns.get_ann_vol(freq="D", meta=True)
+    ann_vol = returns.calc_ann_vol(freq="D", meta=True)
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR", "SPY"],
@@ -304,7 +304,7 @@ def test_annualized_volatility():
     assert ann_vol.equals(expected_output)
 
     # has benchmark, but include_bm=False
-    ann_vol = returns.get_ann_vol(include_bm=False)
+    ann_vol = returns.calc_ann_vol(include_bm=False)
     expected_output = pd.DataFrame(
         data={
             "name": ["TWTR"],
@@ -408,6 +408,32 @@ def test_rolling_tot_ret():
     assert roll_spy["SPY"][0] == 0.1996927920869329
 
 
+def test_rolling_ann_vol():
+
+    returns = ReturnSeries.read_csv("tests/unit/data/twitter_returns.csv")
+
+    # No benchmark
+    roll_ann_vol = returns.get_rolling_ann_vol()
+    roll_twtr = roll_ann_vol["TWTR"]
+    assert roll_twtr.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
+    assert roll_twtr["TWTR"][0] == 0.5791236929456373
+
+    # Daily, rolling 252 days
+    roll_ann_vol = returns.get_rolling_ann_vol(window=252, freq="D")
+    roll_twtr = roll_ann_vol["TWTR"]
+    assert roll_twtr.index[0] == datetime.datetime.strptime("2014-11-06", "%Y-%m-%d")
+    assert roll_twtr["TWTR"][0] == 0.639203890663799
+
+    returns.add_bm(spy)
+    roll_ann_vol = returns.get_rolling_ann_vol()
+    roll_twtr = roll_ann_vol["TWTR"]
+    roll_spy = roll_ann_vol["SPY"]
+    assert roll_twtr.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
+    assert roll_twtr["TWTR"][0] == 0.5791236929456373
+    assert roll_spy.index[0] == datetime.datetime.strptime("2016-10-31", "%Y-%m-%d")
+    assert roll_spy["SPY"][0] == 0.10935207559750833
+
+
 def test_libor_fred():
 
     CashSeries.read_fred_libor_1m()