In [30]:
from copy import deepcopy
import gudhi as gd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gudhi.representations
from datetime import timedelta
from pandas.tseries.offsets import BDay
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.signal import periodogram
from scipy.fftpack import fft, fftfreq, ifft

In [12]:
colnames = ['Date','Open','High', 'Low','Close']

DJIA = pd.read_csv('DJIA.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
NASDAQ = pd.read_csv('NASDAQ.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
Russel2000 = pd.read_csv('Russel2000.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
SP500 = pd.read_csv('S&P500.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')

close = pd.concat([DJIA['Close'],NASDAQ['Close'],Russel2000['Close'],SP500['Close']], axis = 1)
close.columns = ['DJIA', 'NASDAQ','Russel2000', 'SP500']
close.sort_index(inplace = True)
print(close.head())

               DJIA  NASDAQ  Russel2000   SP500
Date                                           
1987-12-23  2005.64  331.48      120.80  253.16
1987-12-24  1999.67  333.19      121.59  252.02
1987-12-28  1942.97  325.60      119.00  245.57
1987-12-29  1926.89  325.53      118.30  244.59
1987-12-30  1950.10  329.70      119.50  247.86


commenter

In [145]:
class FinanceTimeSeries:

    def __init__(self,data):
        self.time_series = data
        self.return_df = False
        self.scaled = False
        self.return_scaled = False
        self.persistence_norms = pd.DataFrame()
        self.persistence_computed = False
        self.avg_PSD = pd.DataFrame()
        self.PSD_filter_keep = None
        self.PSD_freq_cut = None
        self.Lp_norms_scaling = None
        self.norms_std =pd.DataFrame()
        self.std_filter_keep = None
        self.std_freq_cut = None
    
    def copy(self):
        fts = deepcopy(self)
        return fts
    
    def log_return(self, inplace = False):

        """Compute the log return of self.time_series

        Returns:
            pd.DataFrame: log return if inplace == False, else modify self.time_series
        """

        if self.return_df:
            print("This is already a return DataFrame.")
        else:
            if inplace:
                fts = self
            else:
                fts = self.copy()

            fts.time_series = np.log(fts.time_series.pct_change().dropna() +1)

            fts.return_df = True

            if not inplace:
                return fts
            
    def scale(self, inplace = False):

        """Scaling of self.time_series

        Returns:
            pd.DataFrame: log return if inplace == False, else modify self.time_series
        """

        if self.scaled:
            print("The DataFrame is already scaled.")
        else :
            if inplace:
                fts = self
            else:
                fts = self.copy()
            
            scaler = StandardScaler()
            fts.time_series = pd.DataFrame(scaler.fit_transform(fts.time_series),
                                           columns = fts.time_series.columns,
                                             index = fts.time_series.index
                                             )
            
            fts.scaled = True

            if not inplace:
                return fts
            
    def scale_log_return(self, inplace = False):
        if self.return_scaled:
            print("The DataFrame is already a scaled log_return.")
        else :
            if inplace:
                fts = self
            else:
                fts = self.copy()

        fts.log_return(inplace = True)
        fts.scale(inplace = True)

        fts.return_scaled = True

        if not inplace:
            return fts
        
    def compute_persistence_norms_seq(self, window_size, p_norms, dimension, scaling = None, inplace = False):
        if not self.return_scaled:
            print("The TimeSeries is not a scaled log_return.")
        else:
            if inplace:
                fts = self
            else:
                fts = self.copy()

            diagrams = {}
            for t in fts.time_series.index[window_size+1:]:
                points = fts.time_series[t-BDay(window_size): t].to_numpy()
                skeleton = gd.RipsComplex(points = points)
                Rips_tree = skeleton.create_simplex_tree(max_dimension = dimension+1)
                dgr = Rips_tree.persistence()

                LS = gd.representations.Landscape()
                L = LS.fit_transform([Rips_tree.persistence_intervals_in_dimension(dimension)])
                
                norms = [np.linalg.norm(L[0], ord = p) for p in p_norms]
                diagrams[t] = norms

            Norms = pd.DataFrame(diagrams).transpose()
            Norms.columns = [f"L{p}_norm" for p in p_norms]

            if scaling is not None:
                scaler = scaling
                Norms = pd.DataFrame(scaler.fit_transform(Norms),
                                     columns = Norms.columns,
                                     index = Norms.index
                                     )

            fts.persistence_norms = Norms
            fts.persistence_computed = True
            fts.Lp_norms_scaling = scaling

            if not inplace:
                return fts

    def avgPSD(self , window_size, freq_cut = None, filter_keep = None, inplace = False):
        if inplace:
            fts = self
        else:
            fts = self.copy()

        
        def avgPSD_total(data, freq_cut = None, filter_keep = None):
            if filter_keep is not None and freq_cut is None:
                print("No frequence cut provided.")
            else:
                (f,S)= periodogram(data,scaling = 'density')
                df_freq = pd.DataFrame((f,S), index = ['frequency','PSD']).transpose()

                if filter_keep == 'low':
                    return df_freq[df_freq['frequency'] < freq_cut]['PSD'].mean()
                elif filter_keep == 'high':
                    return df_freq[df_freq['frequency'] > freq_cut]['PSD'].mean()
                else:
                    return df_freq['PSD'].mean()
        
        fts.avg_PSD = fts.persistence_norms.rolling(window_size).agg(
            lambda x : avgPSD_total(x, freq_cut, filter_keep)
            )
        fts.avg_PSD.dropna(inplace = True)
        fts.avg_PSD.columns = ['PSD_'+ col_name for col_name in fts.persistence_norms.columns]
        
        fts.PSD_filter_keep = filter_keep
        fts.PSD_freq_cut = freq_cut

        if filter_keep == None:
            print("No filter selected.")

        if not inplace:
            return fts
    
    def std_freq_filter(self, window_size, freq_cut = None, filter_keep = None, spacing = 1, inplace = False):
        if inplace:
            fts = self
        else:
            fts = self.copy()
        
        def std_norms_total(data ,freq_cut = None, filter_keep = None, spacing = 1):
            if filter_keep is not None and freq_cut is None:
                print("No frequence cut provided.")
            else:
                norms_fft = fft(data.values)
                norms_freq = fftfreq(len(norms_fft),spacing)
                
                if filter_keep == 'low':
                    norms_fft[np.abs(norms_freq) > freq_cut] = 0
                    norms_low = np.real(ifft(norms_fft))
                    return norms_low.std()
                elif filter_keep == 'high':
                    norms_fft[np.abs(norms_freq) < freq_cut] = 0
                    norms_high = np.real(ifft(norms_fft))
                    return norms_high.std()
                else:
                    return data.std()
        
        fts.norms_std = fts.persistence_norms.rolling(window_size).agg(
            lambda x : std_norms_total(x, freq_cut, filter_keep, spacing)
            )
        fts.norms_std.dropna(inplace = True)
        fts.norms_std.columns = ['std_'+ col_name for col_name in fts.persistence_norms.columns]

        fts.std_freq_cut = freq_cut
        fts.std_filter_keep = filter_keep

        if filter_keep == None:
            print("No filter selected.")

        if not inplace:
            return fts




In [146]:
fts = FinanceTimeSeries(close)

In [147]:
fts.scale_log_return(inplace = True)

In [148]:
fts.compute_persistence_norms_seq(50,[1,2], 1, scaling = MinMaxScaler(), inplace = True)
fts.persistence_norms

Unnamed: 0,L1_norm,L2_norm
1988-03-09,0.084502,0.143662
1988-03-10,0.084502,0.143662
1988-03-11,0.091862,0.147609
1988-03-14,0.081787,0.143001
1988-03-15,0.081787,0.143001
...,...,...
2016-12-02,0.050529,0.073259
2016-12-05,0.018433,0.044178
2016-12-06,0.044867,0.070679
2016-12-07,0.047041,0.072400


In [149]:
fts.avgPSD(250,freq_cut = 1, filter_keep = 'low').avg_PSD

Unnamed: 0,PSD_L1_norm,PSD_L2_norm
1989-03-03,0.004125,0.008258
1989-03-06,0.004118,0.008239
1989-03-07,0.004110,0.008220
1989-03-08,0.004098,0.008196
1989-03-09,0.004092,0.008178
...,...,...
2016-12-02,0.005548,0.008378
2016-12-05,0.005187,0.007913
2016-12-06,0.004805,0.007421
2016-12-07,0.004508,0.006970


In [159]:
fts.std_freq_filter(250, freq_cut = 10, filter_keep = 'low', spacing = 0.01).norms_std

Unnamed: 0,std_L1_norm,std_L2_norm
1989-03-03,0.043278,0.061696
1989-03-06,0.043237,0.061614
1989-03-07,0.043192,0.061538
1989-03-08,0.043145,0.061469
1989-03-09,0.043107,0.061410
...,...,...
2016-12-02,0.049295,0.061257
2016-12-05,0.046990,0.058726
2016-12-06,0.045075,0.056498
2016-12-07,0.043579,0.054619
