# Exploring Financial Time Series Data



In [426]:
import numpy as np
import functools
import pandas as pd
import datetime
import plotly.offline as py
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from itertools import islice
import sklearn 
import sklearn.decomposition

init_notebook_mode()

plot = lambda d,lbls:py.iplot([go.Scatter(x=d.index,y=d[i],name=i) for i in lbls])

#tuple monad
unit_t = lambda x: x if isinstance(x,(list,tuple)) else (x,)  
bind_t = lambda f,m: f(*m)

#single value monad
#unit_s = lambda x

def iterate(f,*x,unit=unit_t,bind=bind_t):
    while 1:
        x = bind(f,x)
        yield x
        x = unit(x)
        
def nth(n,it):
    return islice(it,n)[-1]

In [575]:
#load timeseries
df=pd.read_csv('tss3.csv', sep=';',index_col=0,parse_dates=True,decimal=',')

## Data
We use the daily adjusted closing prices of 441 stocks on 2458 day from 2011-11-1 to 2016-11-1 out of the S&P 500. We retrieved the data from Quandl and cleaned it up, so that only stocks and days are remaining where we have complete data available. 

In [576]:
def pre_diff(df):
    d=df.copy()
    d.values[1:] =df.values[1:]-df.values[:-1]
    return (d.drop([d.index[0]]),df[0:1])

def pre_mean(df):
    d=df.copy()
    return (d.apply(lambda i: i-np.mean(i)),d.apply(np.mean))

def pre_normalize(df):
    d=df.copy()
    fac=np.max(np.max(np.abs(d)))
    return (d.apply(lambda i: i/fac), fac)

def pre_process(df):
    d, base=pre_diff(df)
    d, means = pre_mean(d)
    d, fac=pre_normalize(d)
    return (d,base,means,fac)

In [677]:
pre_df, base,means,fac = pre_process(df)

In [584]:
idcs = np.argsort(pre_df.apply(max).values)
lbls = [d.columns[i] for i in idcs[-8:]]
lbls

['FSLR', 'AIG', 'ISRG', 'CMG', 'GOOGL', 'PCLN', 'HAS', 'ICE']

## Differential, Normalized Returns
Below we compare the differential, normalized returns with the regular returns.

In [425]:
plot(d,lbls)
plot(df,lbls)

## ICA
in this section, we apply the ICA approach to our data.

In [713]:
# load ICA and fit our data
from sklearn.decomposition import FastICA
ica = FastICA(whiten=False,max_iter=1000)

#reduce datasize
d, base,means,fac = pre_process(df[::2])
ica_d = ica.fit(d)

In [708]:
# compute sources and invert-transformed signal S
S = ica_d.transform(d)
x = ica_d.inverse_transform(S)

In [709]:
def reconstruct(df,x,base,means,fac):
    d=df.copy()
    x = x*fac+means.values
    d.values[:] = (np.cumsum(x,0)+base.values)[:]
    return base.append(d)

In [712]:
rec_df = reconstruct(d,x,base,means,fac)

plot(rec_df,lbls)

This shows that we can successfully perform an ICA decomposition and back again.

In [490]:
topsources = lambda S,n: np.argsort( np.apply_along_axis(lambda x:np.max(np.abs(x)),0,S))[-n:]
topsources(S,8)

array([410, 152, 386, 158, 183, 354,  14, 202])

In [491]:
#show top
py.iplot([go.Scatter(y=S[i]) for i in topsources(S,8)])

In [484]:
np.argsort( np.apply_along_axis(np.max,0,S))

array([354,  14, 314, 257, 227, 122, 316,  64, 234, 298, 167, 261, 307,
       293, 399, 151, 336, 436, 353, 131, 364,   2, 211, 109, 343, 144,
        34,  85,  81, 290, 240, 352, 394, 169, 415, 347, 411, 440, 337,
       258, 135,   1, 393, 365, 186, 378, 114,  98, 113, 263, 196, 308,
       198, 332,  28, 388, 315, 162,  93,  17, 136,  47,  78, 344, 181,
       310,  35, 164, 271, 387,  49, 161, 236, 346, 155, 260, 304, 292,
       158,  46,  18, 133, 323, 341, 157, 107, 241, 141, 395, 432, 324,
       383,  45, 418, 312,  76,  55, 207, 342,  27, 239, 423,   5, 372,
       246, 160, 278, 249, 380, 357, 385, 194, 129, 424, 402, 428, 140,
       193, 437, 280, 218,  74, 369, 214,  83, 259, 412, 281, 105, 371,
        92,  24, 178, 374, 287, 153, 201, 191, 254, 209,  44, 104, 250,
       272, 244, 302, 205, 226, 138,  30, 128,   9, 318, 300, 309, 130,
        86, 384,  41,  95, 230,  22, 126, 338, 275, 143,  59, 243, 274,
       363, 123, 232, 148, 417,  48, 409, 184, 118, 377, 361,  1