# Descriptive statistics

In [None]:
from bokeh.charts import Bar, output_file, output_notebook, show, reset_output, save
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource
from scipy import stats

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy as sc

## Data loading

In [None]:
returns = pd.read_csv(filepath_or_buffer='../donnees/clean/RET_PX_LAST.csv',
                      sep=';',
                      header=0,
                      index_col='Date',
                      parse_dates=True).sort_index(1)

What does a return look like?

In [None]:
reset_output()
p = figure(title='Returns of ACAFP',
           x_axis_type="datetime")
p.line(returns.index.values, returns['ACFP'])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
reset_output()
output_notebook()
show(p)
output_file('../results/statsDesc/returnsExample.html')
save(p)

## Quick statistics

In [None]:
statsDesc = returns.describe()

In [None]:
statsDesc.loc[['mean', 'std', 'min', 'max']].T

## Focus on a range of data
Extreme values

In [None]:
def barPlot(df,title):
    reset_output()
    labels = list(df.keys().values)
    p = figure(x_range=labels,
               title=title)
    source = ColumnDataSource({'labels': list(df.keys().values),
                               'values': df.values})
    p.line('labels', 'values', source=source)
    p.xaxis.major_label_orientation = 45
    output_file('../results/statsDesc/' + title + '.html')
    save(p)
    return True

### Maximum return

In [None]:
barPlot(statsDesc.loc['max'].sort_values()[-20:], title='maximumEvolution')

### Minimum return

In [None]:
barPlot(statsDesc.loc['min'].sort_values()[:20], title='minEvolution')

### Intersection of the previous two ranges

In [None]:
set(statsDesc.loc['max'].sort_values()[-20:].keys()).intersection(statsDesc.loc['min'].sort_values()[:20].keys()).intersection(set(statsDesc.loc['std'].sort_values()[-20:].keys()))

## Correlations between actions

### Correlations computation

In [None]:
returns[list(returns.columns[1:3].values) + list(returns.columns[4:6].values) + ['VOWGY']].corr().round(decimals=2).to_latex()

### Significiance test

In [None]:
seen = []
res = []
for stock1 in returns.columns.values:
    for stock2 in returns.columns.values:
        if stock2 not in seen: res.append([stock1, stock2, sc.stats.pearsonr(returns[stock1], returns[stock2])[1]])
    seen.append(stock1)

In [None]:
sorted(res, key = lambda x: x[2], reverse=True)