In [30]:
from copy import deepcopy
import gudhi as gd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gudhi.representations
from datetime import timedelta
from pandas.tseries.offsets import BDay
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.signal import periodogram
from scipy.fftpack import fft, fftfreq, ifft

In [12]:
colnames = ['Date','Open','High', 'Low','Close']

DJIA = pd.read_csv('DJIA.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
NASDAQ = pd.read_csv('NASDAQ.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
Russel2000 = pd.read_csv('Russel2000.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')
SP500 = pd.read_csv('S&P500.csv', parse_dates = ['Date'], index_col= 'Date', names = colnames, header = 0, date_format = 'mixed')

close = pd.concat([DJIA['Close'],NASDAQ['Close'],Russel2000['Close'],SP500['Close']], axis = 1)
close.columns = ['DJIA', 'NASDAQ','Russel2000', 'SP500']
close.sort_index(inplace = True)
print(close.head())

               DJIA  NASDAQ  Russel2000   SP500
Date                                           
1987-12-23  2005.64  331.48      120.80  253.16
1987-12-24  1999.67  333.19      121.59  252.02
1987-12-28  1942.97  325.60      119.00  245.57
1987-12-29  1926.89  325.53      118.30  244.59
1987-12-30  1950.10  329.70      119.50  247.86


In [90]:
class FinanceTimeSeries:

    def __init__(self,data):
        self.time_series = data
        self.return_df = False
        self.scaled = False
        self.return_scaled = False
        self.persistence_norms = pd.DataFrame()
        self.persistence_computed = False
        self.avg_PSD = pd.DataFrame()
        self.PSD_filter_keep = None
        self.PSD_freq_cut = None
    
    def copy(self):
        df = deepcopy(self)
        return df
    
    def log_return(self, inplace = False):
        if self.return_df:
            print("This is already a return DataFrame.")
        else:
            if inplace:
                df = self
            else:
                df = self.copy()

            df.time_series = np.log(df.time_series.pct_change().dropna() +1)

            df.return_df = True

            if not inplace:
                return df
            
    def scale(self, inplace = False):
        if self.scaled:
            print("The DataFrame is already scaled.")
        else :
            if inplace:
                df = self
            else:
                df = self.copy()
            
            scaler = StandardScaler()
            df.time_series = pd.DataFrame(scaler.fit_transform(df.time_series),columns = df.time_series.columns, index = df.time_series.index)
            
            df.scaled = True

            if not inplace:
                return df
            
    def scale_log_return(self, inplace = False):
        if self.return_scaled:
            print("The DataFrame is already a scaled log_return.")
        else :
            if inplace:
                df = self
            else:
                df = self.copy()

        df.log_return(inplace = True)
        df.scale(inplace = True)

        df.return_scaled = True

        if not inplace:
            return df
        
    def compute_persistence_norms_seq(self, window_size, p_norms, dimension, scaling = None):
        if not self.return_scaled:
            print("The TimeSeries is not a scaled log_return.")
        else:
            diagrams = {}
            for t in self.time_series.index[window_size+1:]:
                points = self.time_series[t-BDay(window_size): t].to_numpy()
                skeleton = gd.RipsComplex(points = points)
                Rips_tree = skeleton.create_simplex_tree(max_dimension = dimension+1)
                dgr = Rips_tree.persistence()

                LS = gd.representations.Landscape()
                L = LS.fit_transform([Rips_tree.persistence_intervals_in_dimension(dimension)])
                
                norms = [np.linalg.norm(L[0], ord = p) for p in p_norms]

                diagrams[t] = norms
            Norms = pd.DataFrame(diagrams).transpose()
            Norms.columns = [f"L{p}_norm" for p in p_norms]

            if scaling is not None:
                scaler = scaling
                Norms = pd.DataFrame(scaler.fit_transform(Norms),columns = Norms.columns, index = Norms.index)

            self.persistence_norms = Norms
            self.persistence_computed = True

    def avgPSD(self , window_size, freq_cut = None, filter_keep = None):

        
        def avgPSD_total(data, freq_cut = None, filter_keep = None):
            if filter_keep is not None and freq_cut is None:
                print("No frequence cut provided.")
            else:
                (f,S)= periodogram(data,scaling = 'density')
                df_freq = pd.DataFrame((f,S), index = ['frequency','PSD']).transpose()
                if filter_keep == 'low':
                    return df_freq[df_freq['frequency'] < freq_cut]['PSD'].mean()
                elif filter_keep == 'high':
                    return df_freq[df_freq['frequency'] > freq_cut]['PSD'].mean()
                else:
                    return df_freq['PSD'].mean()
        
        self.avg_PSD = self.persistence_norms.rolling(window_size).agg(lambda x : avgPSD_total(x, freq_cut, filter_keep))
        self.avg_PSD.dropna(inplace = True)
        self.avg_PSD.columns = ['PSD_'+ col_name for col_name in self.persistence_norms.columns]
        self.PSD_filter_keep = filter_keep
        self.PSD_freq_cut = freq_cut



In [91]:
fts = FinanceTimeSeries(close)

In [92]:
fts.scale_log_return(inplace = True)

In [93]:
fts.compute_persistence_norms_seq(50,[1,2], 1, scaling = MinMaxScaler())
print(fts.persistence_norms)

             L1_norm   L2_norm
1988-03-09  0.084502  0.143662
1988-03-10  0.084502  0.143662
1988-03-11  0.091862  0.147609
1988-03-14  0.081787  0.143001
1988-03-15  0.081787  0.143001
...              ...       ...
2016-12-02  0.050529  0.073259
2016-12-05  0.018433  0.044178
2016-12-06  0.044867  0.070679
2016-12-07  0.047041  0.072400
2016-12-08  0.047041  0.072400

[7249 rows x 2 columns]


In [94]:
fts.avgPSD(250,freq_cut = 1, filter_keep = 'low')
print(fts.avg_PSD)

            PSD_L1_norm  PSD_L2_norm
1989-03-03     0.004125     0.008258
1989-03-06     0.004118     0.008239
1989-03-07     0.004110     0.008220
1989-03-08     0.004098     0.008196
1989-03-09     0.004092     0.008178
...                 ...          ...
2016-12-02     0.005548     0.008378
2016-12-05     0.005187     0.007913
2016-12-06     0.004805     0.007421
2016-12-07     0.004508     0.006970
2016-12-08     0.004208     0.006515

[7000 rows x 2 columns]


In [47]:
print(fts.scale_log_return().time_series)

                DJIA    NASDAQ  Russel2000     SP500
Date                                                
1987-12-24 -0.307443  0.334678    0.480984 -0.430986
1987-12-28 -2.714342 -1.646153   -1.700630 -2.348739
1987-12-29 -0.804945 -0.042002   -0.484861 -0.384903
1987-12-30  1.088561  0.867545    0.758970  1.162635
1987-12-31 -0.570217  0.137029    0.570491 -0.309064
...              ...       ...         ...       ...
2016-12-02 -0.133845  0.033835    0.000635  0.008772
2016-12-05  0.193695  0.681368    1.354759  0.493060
2016-12-06  0.143330  0.291513    0.834322  0.278175
2016-12-07  1.403991  0.769186    0.651823  1.144416
2016-12-08  0.281609  0.279899    1.210140  0.166406

[7300 rows x 4 columns]
