#  Financial time series analysis

In [None]:
#%matplotlib notebook
%matplotlib inline

In [None]:
import sys
import csv

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import scipy.sparse as ss
import scipy.stats as st

sys.path.append('../src/')
from cluster import Cluster
import signet_utils as ut
from signet_utils import  objscore
np.set_printoptions(2)

In [None]:
import matplotlib
from mpl_toolkits.axes_grid1 import AxesGrid

def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero.

    Input
    -----
      cmap : The matplotlib colormap to be altered
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower offset). Should be between
          0.0 and `midpoint`.
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax / (vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      stop : Offset from highest point in the colormap's range.
          Defaults to 1.0 (no upper offset). Should be between
          `midpoint` and 1.0.
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap

# shifted cmap (useful to plot correlation matrices)
s_cmap = shiftedColorMap(plt.cm.seismic, start= -0.4, midpoint=0., stop=1.0, name='shiftedcmap')
#s_cmap = shiftedColorMap(plt.cm.seismic,  midpoint=0., name='shiftedcmap')

## Import data

In [None]:
# numpy arrays, days and prices
days = np.genfromtxt('../data/SP1500/Days_SP1500_20030101_20150415.csv', delimiter=' ')
prices = np.genfromtxt('../data/SP1500/Prices_SP1500_20030101_20150415.csv', delimiter=',')
prices = prices.T

In [None]:
# python lists, sectors
with open('../data/SP1500/Sectors_SP1500_20030101_20150415.csv', 'r') as f:
    reader = csv.reader(f)
    sectors = list(reader)
    
all_sectors = list(set(i[3] for i in sectors))
all_sectors.remove('SPY')

In [None]:
all_sectors

In [None]:
all_d = list(set(i[0] for i in sectors))
all_d

In [None]:
all_sectors_short = ['Health',
 'Telecom',
 'Mat',
 'C_Staples',
 'Financials',
 'Utilities',
 'Energy',
 'Indust',
 'C_Discret',
 'IT']

In [None]:
# lengths
print(days.shape)
print(len(sectors))
print(prices.shape)

In [None]:
all_d = list(set((i[0],i[3]) for i in sectors))

In [None]:
all_d

## Filter out companies with zero entries

In [None]:
# select only companies without any zero entries
idx = np.arange(prices.shape[0])
non_zero = ~np.any(prices == 0, axis=1)
non_zero_idx = idx[non_zero]

In [None]:
prices_ = prices[non_zero_idx]
sectors_ = list( sectors[i] for i in  non_zero_idx)

In [None]:
print(days.shape)
print(len(sectors_))
print(prices_.shape)

In [None]:
# calculate excessive market returns
log_prices = np.log(prices_)
returns = np.diff(log_prices)
ex_rets = returns - returns[0]

print(returns)

In [None]:
# remove SPY index
ex_rets_ = ex_rets[1:, 1:]
sectors_ = sectors_[1:]
prices_ = prices_[1:]

In [None]:
# correlation matrix
corrs = np.corrcoef(ex_rets_)

## Clustering given by sector assignment

In [None]:
prices_sorted = np.zeros(prices_.shape[1])

for i in np.arange(10):
    
    idx_i = [j for j, (a, b, c, d) in enumerate(sectors_) if d == all_sectors[i]]
    
    prices_sorted = np.vstack((prices_sorted, prices_[idx_i]))
    
prices_sorted = prices_sorted[1:]

In [None]:
log_prices_s = np.log(prices_sorted)
returns_s = np.diff(log_prices_s)
ex_rets_s = returns_s - returns[0]
corr_sorted = np.corrcoef(ex_rets_s )

## Clustering the fully connected graph

In [None]:
A_p = ss.load_npz('../data/SP1500/adjacency_plus_cc.npz')
A_n = ss.load_npz('../data/SP1500/adjacency_minus_cc.npz')

In [None]:
labels = np.load('../data/SP1500/sector_labels.npy')

In [None]:
m = Cluster((A_p, A_n))

In [None]:
k=10

### Sort correlation matrix

In [None]:
def enspace(s):
    if s=='Consumer_Discretionary':
        return 'Discretionary'
    elif s=='Health_Care':
        return 'Healthcare'
    elif s=='Telecommunications_Services':
        return 'Telecoms'
    elif s=='Information_Technology':
        return 'IT'
    elif s=='Consumer_Staples':
        return 'Staples'
    return s

In [None]:
print(sectors_[0])

In [None]:
data_pred = np.load('../logs/SP1500/200_10_90_10_1000/04-22-22:49:19/SSSNET_L_pred_latest3.npy') # all_ARI = 0.707
preds = data_pred
preds = list(map(int, preds))
plt.hist(data_pred, alpha=0.5)
plt.hist(labels,alpha=0.5)
plt.show()
x_=ut.sizeorder(preds,k,m.p,m.n,True)
plt.rcParams.update({'font.size': 15})
ticks=[i+0.5 for i in range(len(x_)-1) if preds[x_[i]]!=preds[x_[i+1]]]
order =[x_.index(l) for l in range(len(x_))] 
ax = plt.scatter(range(len(x_)) , [enspace(i[3]) for i in list(np.array(sectors_)[x_])],s=350,cmap='plasma',marker='|',c=[all_sectors.index(i[3]) for i in list(np.array(sectors_)[x_])])
plt.vlines(ticks,-0.5,len(all_sectors)-0.5)
ax.axes.get_xaxis().set_visible(False)
plt.savefig('SP1500ind_SSSNET.pdf', dpi = 300, bbox_inches='tight', pad_inches = 0.25)

### Summary
This is a sample notebook to analyze S\&P1500 data set results.