In [None]:
import dautil as dl
from scipy import stats
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.html import widgets
from IPython.display import display
from IPython.display import HTML

In [None]:
lr = dl.nb.LatexRenderer(chapter=3, start=17)
lr.render(r'\rho = {1- \frac {6 \sum d_i^2}{n(n^2 - 1)}}')
lr.render(r'd_i = x_i - y_i')
lr.render(r'\sigma = \frac{ 0.6325 }{ \sqrt{n-1} }')
lr.render(r'z = \sqrt{\frac{n-3}{1.06}}F(r)')

In [None]:
def get_ci(n, corr):
    z = math.sqrt((n - 3)/1.06) * np.arctanh(corr)
    se = 0.6325/(math.sqrt(n - 1))
    ci = z + np.array([-1, 1]) * se * stats.norm.ppf((1 + 0.95)/2)

    return np.tanh(ci)

In [None]:
df = dl.data.Weather.load().dropna()
df = dl.ts.groupby_yday(df).mean()

drop1 = widgets.Dropdown(options=dl.data.Weather.get_headers(), 
                         selected_label='TEMP', description='Variable 1')
drop2 = widgets.Dropdown(options=dl.data.Weather.get_headers(), 
                         selected_label='WIND_SPEED', description='Variable 2')
display(drop1)
display(drop2)

In [None]:
var1 = df[drop1.value].values
var2 = df[drop2.value].values
stats_corr = stats.spearmanr(var1, var2)
dl.options.set_pd_options()
html_builder = dl.report.HTMLBuilder()
html_builder.h1('Spearman Correlation between {0} and {1}'.format(
    dl.data.Weather.get_header(drop1.value), dl.data.Weather.get_header(drop2.value)))
html_builder.h2('scipy.stats.spearmanr()')
dfb = dl.report.DFBuilder(['Correlation', 'p-value'])
dfb.row([stats_corr[0], stats_corr[1]])
html_builder.add_df(dfb.build())

In [None]:
n = len(df.index)
ci = get_ci(n, stats_corr)
html_builder.h2('Confidence intervale')
dfb = dl.report.DFBuilder(['2.5 percentile', '97.5 percentile'])
dfb.row(ci)
html_builder.add_df(dfb.build())

corr = df.corr(method='spearman')

In [None]:
%matplotlib inline
plt.title('Spearman Correlation Matrix')
sns.heatmap(corr)
HTML(html_builder.html)