## Initialize

In [None]:
%load_ext autoreload

%autoreload 2
%matplotlib inline

import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as mplcm
import matplotlib.colors as colors
import numpy as np
import pytz
import pandas as pd
import seaborn as sns
import math
import scipy
import copy
import plotly.graph_objects as go

from analysis import get_ticker_history, get_etfs_history, History


MOSCOW_TIMEZONE = pytz.timezone('Europe/Moscow')

# From rest
# client = openapi.sandbox_api_client(token)
# client.sandbox.sandbox_register_post()
# client.sandbox.sandbox_clear_post()
# client.sandbox.sandbox_currencies_balance_post(sandbox_set_currency_balance_request={"currency": "USD", "balance": 1000})


# From streaming
# candle_subs = [{'figi': 'BBG000B9XRY4', 'interval': '1min'}, {'figi': 'BBG009S39JX6', 'interval': '1min'}]
# orderbook_subs = [{'figi': 'BBG0013HGFT4', 'depth': 1}, {'figi': 'BBG009S39JX6', 'depth': 3}]
# instrument_info_subs = [{'figi': 'BBG000B9XRY4'}, {'figi': 'BBG009S39JX6'}]

# run_stream_consumer(token,
#                     candle_subs, orderbook_subs, instrument_info_subs,
#                     on_candle_event=print_event,
#                     on_orderbook_event=print_event,
#                     on_instrument_info_event=print_event)



In [None]:
end = dt.datetime.now(dt.timezone.utc)
start = end - dt.timedelta(weeks=52)
interval = 'day'
ticker='FXGD'

# print(etfs)


# print(market.market_candles_get(figi=figi, _from=start.isoformat(), to=end.isoformat(), interval=interval))
# ['payload']['instruments'][0]['figi']

ticker_history = get_ticker_history(ticker=ticker, start=start, end=end, interval=interval)

In [None]:
plt.figure()
plt.plot(ticker_history['time'].values, ticker_history['c'])
plt.title(ticker)

In [None]:
etfs_history, etf_tickers = get_etfs_history()

## Download and store history

In [None]:
hist_daily = History(interval='day')
# display(hist_daily.data.time.dtype)
hist_daily.update(reload=0)
hist_daily.data

In [None]:
hist_daily.data.time.max()

In [None]:
data = hist_daily.data
tickers = hist_daily.tickers
# print(data['time'].dtype)

M = len(hist_daily.tickers)
cm = plt.get_cmap('gist_rainbow')
cNorm  = colors.Normalize(vmin=0, vmax=M-1)
scalarMap = mplcm.ScalarMappable(norm=cNorm, cmap=cm)

def get_last_not_nan(lst):
    for el in lst[::-1]:
        if not np.isnan(el):
            return el
        
    return np.nan


plt.figure(figsize = (20,10))
ax = plt.gca()
ax.set_prop_cycle(color=[scalarMap.to_rgba(i) for i in range(M)])
for ticker in tickers:
    filter = (data.ticker == ticker)
    t = data.loc[filter, 'time'].values
    y = data.loc[filter, 'c'].values # / get_last_not_nan(data[ticker+'_c']) * 100
#     print(ticker, y)
    plt.plot(t, y, label=ticker)
    
plt.ylabel('Price')
plt.xlabel('Time')
plt.legend()

plt.figure(figsize = (20,10))
ax = plt.gca()
ax.set_prop_cycle(color=[scalarMap.to_rgba(i) for i in range(M)])
for ticker in tickers:
    filter = data.ticker == ticker
    t = data.loc[filter, 'time'].values
    y = data.loc[filter, 'c'] / get_last_not_nan(data.loc[filter, 'c']) * 100
    plt.plot(t, y, label=ticker)
    
plt.ylabel('Price, % (100%=now)')
plt.xlabel('Time')
plt.legend()

## Basic calculate_statistics

In [None]:
hist_daily.calculate_statistics()

## Annualized gain
Observations:
1. I have learned here that during the day most stocks drop! The mean day change is negative! It's better on average to buy in the afternoon.
2. It is strange that even of FXMM my error is so large. That means I can do something. Maybe by increasing the analysis interval? Or producing a more long-term estimate?

In [None]:
data = hist_daily.data
# Drop two weird indices


data['day_change'] = data['c'] - data['o']
data['norm_day_change'] = data['day_change'] / data['o']
data['day_change_c2c']= -data.loc[:, ['ticker', 'c']].groupby(by='ticker').diff(-1)  # close to close change 


data['dt']= data.loc[:, ['ticker', 'time']].groupby(by='ticker').diff(-1)  # close to close change 
data.loc[data['dt'] != dt.timedelta(days=-1), 'day_change_c2c'] = np.nan
data['rel_day_change_c2c']= data['day_change_c2c'] / data['c']
# Make sure it's 1-day intervals only

data_filtered = copy.deepcopy(data)
data_filtered[(data_filtered['ticker'] == 'RUSE') | (data_filtered['ticker'] ==  'RUSB')] = np.nan


tickers = data.ticker.unique()
N = len(tickers)
data[['day_change_c2c', 'dt', 'rel_day_change_c2c']]

### Daily gains plot

In [None]:
column = 'rel_day_change_c2c'  # 'day_change'

one_day_params = data.loc[:, ['ticker', column]].groupby(
    by='ticker').mean().rename(columns={column: 'mean'})
one_day_params['var'] = data.loc[:, [
    'ticker', column]].groupby(by='ticker').var()
one_day_params['std'] = data.loc[:, [
    'ticker', column]].groupby(by='ticker').std()
one_day_params['count'] = data.loc[:, [
    'ticker', column]].groupby(by='ticker').count()
one_day_params['SE'] = one_day_params['std'] / np.sqrt(one_day_params['count'])

# Drop weird values
one_day_params.loc[one_day_params['mean'] > 0.1, 'mean'] = np.nan

one_day_params_sorted = one_day_params.sort_values(
    by='mean', ascending=False, axis=0)

fig = go.Figure(
    go.Bar(x=one_day_params_sorted.index, y=one_day_params_sorted['mean']))
fig.update_layout(xaxis_title='Ticker', yaxis_title='Avg. daily gains, %',
                  title='Annulaized gain estimate from close to close daily change')
fig.show()

fig = go.Figure(
    go.Bar(x=one_day_params_sorted.index, y=one_day_params_sorted['mean'], error_y=dict(type='data',
                                                                                        array=one_day_params_sorted['SE'].values)))
fig.update_layout(xaxis_title='Ticker', yaxis_title='Avg. daily gains, %',
                  title='Annulaized gain estimate from close to close daily change ± SE')
fig.show()

fig2 = px.violin(data_filtered, y=column, x='ticker', points = 'all')
fig2.show()

In [None]:
ticker = 'FXIT'
# print(data['ticker' == ticker])
fig3 = px.violin(data_filtered[data_filtered['ticker'] == ticker], y=column, points = 'all')
fig3.show()

### Annualized gains. Assume normality and geometric Brownian motion

In [None]:
days = 52*7
# means =


# According to the solution of the geometric Brownian motion (and assuming it), one gets for the annualized gain
yearly_gain_percent = np.exp(one_day_params['mean'] * days) * 100
yearly_std_percent = np.sqrt(np.exp(
    2 * one_day_params['mean'] * days) * (np.exp(one_day_params['var'] * days) - 1))*100

# Drop weird values
yearly_gain_percent[yearly_gain_percent > 10**3] = np.nan
sorted = yearly_gain_percent.sort_values(ascending=False)

fig = go.Figure(
    go.Bar(x=sorted.index, y=sorted))
fig.update_layout(xaxis_title='Ticker', yaxis_title='Annualized gain, %',
                  title='Annulaized gain estimate from close to close daily change')
fig.show()

print(one_day_params)
print(yearly_gain_percent.sort_values(ascending=False))
print(yearly_variance_percent_sq)
# annualized_gains_normal

## Correlations

### Day change 

In [None]:
bins = 200
figsize = np.array([1, 2]) * 14
quant = 0.98
column = 'rel_day_change_c2c'  # 'day_change_c2c'  # 'day_change'




cols = 2
rows = math.ceil(len(tickers)/cols)
fig, axes = plt.subplots(nrows =rows, ncols = cols, figsize = figsize, gridspec_kw = {'hspace': 0.3})
axes = np.array(axes).flat

skews = []
for ticker, ax in zip(tickers, axes):
    dat1=data.loc[data.ticker ==ticker, column]
#     dat1 = dat1[np.logical_not(pd.isna(dat1))]
#     print(dat1[np.logical_not(pd.isna(dat1))])
    
    plot_range = np.nanquantile(dat1.values, [1-quant, quant])
    dat2 = dat1[(dat1 >= plot_range[0]) & (dat1 <= plot_range[1])]
    if len(dat2) > 0:
        sns.distplot(dat2.values, bins = bins, ax = ax)
    skew = scipy.stats.skew(dat1, nan_policy= 'omit').data
    skews.append(skew)
    ax.set_title(f'{ticker}, len = {len(dat1)}, skew = {skew:.2f}')
#     plot_range = np.quantile(dat1.values, [1-quant, quant])
#     ax.set_xlim(plot_range)

fig.suptitle(column);
    
plt.figure()
# plt.hist(skews)
sns.distplot(skews)
print(skews)

### Day change normalized to openning

In [None]:
# fig, axes = plt.subplots(nrows =rows, ncols = cols, figsize = figsize, gridspec_kw = {'hspace': 0.3})
# axes = np.array(axes).flat

# skews = []
# for ticker, ax in zip(tickers, axes):
#     dat1=data.loc[data.ticker ==ticker, 'norm_day_change']
# #     print('Data points: ', len(dat1))
#     plot_range = np.quantile(dat1.values, [1-quant, quant])

#     dat2 = dat1[(dat1 >= plot_range[0]) & (dat1 <= plot_range[1])]
    
#     sns.distplot(dat2.values, bins = bins, ax = ax)
    
#     skew = scipy.stats.skew(dat1)
#     skews.append(skew)
#     ax.set_title(f'{ticker}, len = {len(dat1)}, skew = {skew:.2f}')


# #     plot_range = np.quantile(dat1.values, [1-quant, quant])
# #     ax.set_xlim(plot_range)

# fig.suptitle('Normalized day changes');

# plt.figure()
# plt.hist(skews)
# print(skews)

The gain from normalization is not evident. Abandon

### Correlation matrix

In [None]:
N = len(tickers)
cov_matrix = np.full((N, N), np.nan)


for i in range(N):
    ser_i = copy.deepcopy(data.loc[data.ticker == tickers[i], [
                          'time', column]]).set_index('time')
    index_i = set(ser_i.index)
    for j in range(N):
#         if i==j:  # diagonal elements are not calculated
#             continue
        ser_j = copy.deepcopy(data.loc[data.ticker == tickers[j], [
                              'time', column]]).set_index('time')
        common_index = index_i & set(ser_j.index)

        cov_matrix[i, j] = (ser_i.loc[common_index, column].cov(ser_j.loc[common_index, column])
                            / np.sqrt(ser_i.loc[common_index, column].var() * ser_j.loc[common_index, column].var())
                            )

cov_matrix

In [None]:
# Cluster the correlation matrix
import scipy.cluster.hierarchy as sch

cov_df = pd.DataFrame(cov_matrix, columns = tickers, index = tickers)
X = cov_df.values
d = sch.distance.pdist(X)   # vector of pairwise distances
# print('d', d)
L = sch.linkage(d, method='complete')
# print('L', L)
ind = sch.fcluster(L, 0.5*d.max(), 'distance')
# print(ind)

columns = [cov_df.columns.tolist()[i] for i in list((np.argsort(ind)))]
# print(columns)
clustered_cov_df = cov_df.loc[columns, columns]

In [None]:
import plotly.express as px
# cov_df = pd.DataFrame(cov_matrix, columns = tickers, index = tickers)

# # Sort by max of a column/row
# sorted_ticker_inds =  np.sum(np.abs(cov_matrix), axis = 0).argsort()
# print(cov_df.columns)
# cov_df = cov_df.iloc[sorted_ticker_inds, sorted_ticker_inds]
# print(cov_df.columns)

# plt.figure(figsize = (10,10))
# sns.heatmap(cov_df)
fig = px.imshow(clustered_cov_df, x=clustered_cov_df.columns, y = clustered_cov_df.index)
fig.show()
# ax = plt.gca()
# ax.set_xticklabels(cov_df.columns)
# ax.set_yticklabels(cov_df.index)
# [tickers[ind] for ind in sorted_ticker_inds]

### Individual correlation profile

In [None]:
ticker = 'FXIT'


def plot_one_correlation(ticker):

    ind = np.argwhere(np.array(clustered_cov_df.columns == ticker))[0,0]
    d3 = clustered_cov_df.iloc[ind, :].sort_values(ascending = False)

    fig = go.Figure(go.Bar(x=d3.index, y = d3))
    fig.update_layout(title_text=ticker + ' correlations', xaxis_title = 'Ticker', yaxis_title = 'Correlation')
    fig.show()

plot_one_correlation(ticker)

In [None]:
plot_one_correlation('SBMX')

In [None]:
plot_one_correlation('FXMM')

In [None]:
plot_one_correlation('FXTB')