<a href="https://colab.research.google.com/github/KilovoIt/Jupyter-Notebooks/blob/main/Cheatsheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### *'If you cannot replicate it, you don't understand it'*

In [None]:
from IPython.display import Javascript
def resize_colab_cell():
  display(Javascript('google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'))
get_ipython().events.register('pre_run_cell', resize_colab_cell)

# <font color='Teal'>**Central Limit Theorem:**</font>

### Sufficiently large sample sizes will form approximately normal distribution of the sample means. 

######  <font color='DarkBlue'>*We have some random distribution. We take a simple random sample of **n** from the population, and repeat this procedure x1000. I could probably use a larger number for that, but that would have a detrimental effect on code performance. Will the distribution of the sample means look the same in all of the cases? What are the factors affecting the shape of the distribution?* </font>

In [None]:

#@title

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual, IntSlider
import ipywidgets as widgets
from IPython.display import display


import random
import seaborn as sns
import math
import scipy.stats as stats
from scipy.stats import expon
import numpy as np
import matplotlib.pyplot as plt

#This function prepares population distribution 
def DISTR(shape):
        
        if shape == 'Bimodal':
            mu = 0
            variance = 1
            sigma = math.sqrt(variance)
            
            x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 10000)
            y =  stats.norm.pdf(x, mu, sigma)

        elif shape == 'Uniform':
            y = []
            for i in range(1000):
                for k in range(50):
                    y.append(i)

        elif shape == 'Exponential':
            y = expon.pdf(np.linspace(expon.ppf(0.01),
                    expon.ppf(0.99), 1000))
            
        elif shape == 'Normal':
            y = np.random.normal(100, 5, 10000)

        return(y)

dist = []
for distribution in ['Normal', 'Bimodal', 'Exponential', 'Uniform']:
    dist.append(DISTR(distribution))


    

def CLT(n=100, dist=dist, shape='Normal'):
    sample_means = []

    if shape == 'Normal':
        y = dist[0]

    elif shape == 'Bimodal':
        y = dist[1]

    elif shape == 'Exponential':
        y = dist[2]

    else:
        y = dist[3]

    for i in range(1000):
        sample_means.append(np.mean(random.sample(list(y), n)))


    fig = plt.figure(figsize=(20, 10), dpi=300)

    ax0 = plt.subplot(121)
    ax0.hist(y, color='PaleGreen', bins=40, edgecolor='black', lw=1.2)
    ax0.set_title('Population distribution')
    ax0.spines['right'].set_visible(False)
    ax0.spines['top'].set_visible(False)

    ax1 = plt.subplot(122)
    ax1.hist(sample_means, edgecolor='black', color='LightSkyBlue', bins=40)
    ax1.spines['right'].set_visible(False)
    ax1.spines['top'].set_visible(False)
    ax1.set_title(f'Distribution of the sample means, sample size of {n}')

    plt.suptitle("Central Limit Theorem")
    plt.show()



samp_size = widgets.IntSlider(min=1, max=100, step=1, continuous_update=False, value=1, description='sample size')
shape_pop = widgets.Dropdown(options=['Bimodal', 'Normal', 'Uniform', 'Exponential'], description='population:')
ui = widgets.HBox([samp_size, shape_pop])


out = widgets.interactive_output(CLT, {'n': samp_size, 'shape': shape_pop})
display(ui, out)


<IPython.core.display.Javascript object>

HBox(children=(IntSlider(value=1, continuous_update=False, description='sample size', min=1), Dropdown(descrip…

Output()

I took 1000 random samples of **n** from the original population. Where did the distribution of the sample means started looking somewhat bell-shaped? What happened to the standard deviation of that distribution with **n** increasing? Did it become narrower? 

In [None]:
#@title

## Basic T-Distribution
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
import scipy.integrate as integrate
import pandas as pd

### Describing all the necessary widgets
alpha_level = widgets.FloatSlider(min=0.01, max=0.99, step=0.01, description=u'\u03B1:', continuous_update=False)
n_of_tails = widgets.Dropdown(options=[('one-tailed', 1), ('two-tailed', 2)], description='tails:')
direction = widgets.Dropdown(options=[(u'\u00B5\u2264x\u0304','popmean<=sampmean'), (u'\u00B5\u2265x\u0304','popmean>=sampmean')])
tcrit_level = widgets.FloatSlider(min=-2.54, max=2.54, step=0.01, value=-2.54, description='t-crit:', continuous_update=False)
t_observed = widgets.FloatSlider(min=-2.54, max=2.54, step=0.01, value=-1.2, description='t-observed:', continuous_update=False) 


### Describing Handlers
def tcrit_handler(*args):
    dof=19
    n = n_of_tails.value
    if n == 1:
        if direction.value == 'popmean<=sampmean':
            tcrit_level.value=round(stats.t.ppf(alpha_level.value/1, dof), 2)
        else:
            tcrit_level.value=(-1) * round(stats.t.ppf(alpha_level.value/1, dof), 2)
    else:
        tcrit_level.value=-round(stats.t.ppf(alpha_level.value/2, dof), 2)


def alpha_handler(*args):
    dof=19
    if n_of_tails.value == 1:
        if direction.value == 'popmean<=sampmean':
            func = lambda x: stats.t.pdf(x, dof)
            result = integrate.quad(func, -tcrit_level.value, 5)
            result = result[0]
        else:
            func = lambda x:stats.t.pdf(x, dof)
            result = integrate.quad(func, tcrit_level.value, 5)
            result = result[0]
    else:
        func = lambda x:stats.t.pdf(x, dof)
        result = integrate.quad(func, tcrit_level.value, 5)
        result = result[0] * 2

    alpha_level.value = round(result, 2)


def t_handler(*args):
    if n_of_tails.value == 2:
        t_observed.max = 3
        t_observed.min = 0
        t_observed.value = 0.35

### Watch Functions
alpha_level.observe(tcrit_handler, 'value')
tcrit_level.observe(alpha_handler, 'value')
direction.observe(tcrit_handler, 'value')
n_of_tails.observe(t_handler, 'value')

### Describing the main function
def distrib(alpha, t_observed, n=1, tcrit_direction='popmean<=sampmean'):
    ## Setup      
    dof = 19       # Degrees of freedom
    #alpha = alpha    # Significence level
    ntails = n      # Number of tails 

    ## Calculate critical t-score
    tcrit = stats.t.ppf(alpha/ntails, dof)
    

    fig = plt.figure(figsize=(15, 8), dpi=200)
    xs = np.linspace(-5,5,1000)
    ax = plt.subplot(111) 
    ax.plot(xs, stats.t.pdf(xs,dof), 'k', label="T-Distribution PDF", color='#0047ab')
    ax.fill_between(xs, stats.t.pdf(xs,dof), color='#99D7EE', alpha=1)
    ax.set_ylim([0, 0.5])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title('T-distribution')
    ax.set_xlabel('t')
    ax.set_ylabel('Density')


    
    if n == 2:
        plt.vlines([-tcrit, tcrit], 0.0, stats.t.pdf(tcrit, dof), colors='r')
        plt.vlines([-t_observed, t_observed], 0.0, stats.t.pdf(t_observed, dof), colors='#009AD5')
        ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs<tcrit, color='#AA1839', alpha=.8, label=u'\u03B1')
        ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs>-tcrit, color='#AA1839', alpha=.8)
        ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs>t_observed, color='#0047AB', alpha = 0.5, label='p-value')
        ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs<-t_observed, color='#0047AB', alpha = 0.5)
        
        
    ### Plotting the lines    
    elif n == 1:
        plt.vlines(t_observed, 0.0, stats.t.pdf(t_observed, dof), colors='#009AD5')
        
        if tcrit_direction == 'popmean<=sampmean':
            ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs<t_observed, color='#0047AB', alpha=0.8, label='p-value')
            ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs<tcrit, color='#AA1839', alpha=0.8, label=u'\u03B1' )
            plt.vlines(tcrit, 0.0, stats.t.pdf(tcrit, dof), colors='r')

        else:
            ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs>-tcrit, color='#AA1839', alpha=0.8, label=u'\u03B1')
            ax.fill_between(xs, stats.t.pdf(xs,dof), where=xs>t_observed, color='#0047AB', alpha=0.8, label='p-value')
            plt.vlines(-tcrit, 0.0, stats.t.pdf(tcrit, dof), colors='r')
    plt.legend()
    
    ### Making decision about accepting or rejecting the Null
    text_responses = [r'$\mathit{H}_0$ hypthesis is not rejected', r'$\mathit{H}_0$ hypothesis is rejected']
    text_kwargs = dict(ha='center', va='center', fontsize=14, color='DarkBlue')
    if ntails == 1:
        if tcrit_direction == 'popmean<=sampmean':  
            if tcrit<=t_observed:
                ax.text(0, .45, text_responses[0], **text_kwargs)
            else:
                ax.text(0, .45, text_responses[1], **text_kwargs)
        else:
            if -tcrit>=t_observed:
                ax.text(0, .45, text_responses[0], **text_kwargs)
            else:
                ax.text(0, .45, text_responses[1], **text_kwargs)
    else:
        if -tcrit>=t_observed:
            ax.text(0, .45, text_responses[0], **text_kwargs)
        else:
            ax.text(0, .45, text_responses[1], **text_kwargs)

    f = lambda x: stats.t.pdf(x, dof)
    if ntails == 1:
        if tcrit_direction == 'popmean<=sampmean':
            p = integrate.quad(f, -5, t_observed)
            p = p[0]
        else:
            p = integrate.quad(f, t_observed, 5)
            p = p[0]  
    else:
        p = integrate.quad(f, t_observed, 5)
        p = p[0] * 2

    ax.text(-4, .45, u'\u03B1:{}'.format(alpha))
    ax.text(-4, .435, 'p-value:{}'.format(round(p, 2)))


    plt.show()
    
    
    
    
caption = widgets.Label(value='Controls')    
ui = widgets.HBox([n_of_tails, direction, alpha_level, tcrit_level, t_observed])
out = widgets.interactive_output(distrib, {'alpha':alpha_level, 'n':n_of_tails, 'tcrit_direction':direction, 't_observed':t_observed})

display(caption,ui, out)




<IPython.core.display.Javascript object>

Label(value='Controls')

HBox(children=(Dropdown(description='tails:', options=(('one-tailed', 1), ('two-tailed', 2)), value=1), Dropdo…

Output()


# The Law of Large Numbers

In [None]:
#@title

import numpy as np
from numpy import random
from IPython.display import clear_output
from ipywidgets import widgets
from IPython.display import display
from traitlets import traitlets 


class LoadedButton(widgets.Button):
    def __init__(self, value=None, *args, **kwargs):
        super(LoadedButton, self).__init__(*args, **kwargs)
        self.add_traits(value=traitlets.Any(value))


generate = LoadedButton(description='generate', value=[])
sample = widgets.IntSlider(min=1, max=1000, step=1, continuous_update=False)
ui1 = widgets.HBox([generate, sample])


    
def create_sample(a):
    samples = []
    averages = []
    for i in range(1000):
        samples.append(random.randint(1, 7))
        averages.append(sum(samples)/len(samples))

    a.value = averages

def main(n, **kwargs):
    if len(generate.value) != 0:
        pop = generate.value
        to_plot = pop[0:n]
        fig1 = plt.figure(figsize=(20, 10), dpi=300)
        ax3 = plt.subplot(111)

        ax3.plot(to_plot, color='#002456')
        ax3.plot([0, len(to_plot)], [3.5, 3.5], lw=0.8, color='r', alpha=0.7)
        ax3.set_ylim([1, 6])
        ax3.set_xlim([0, len(to_plot)])
        ax3.set_ylabel('average')
        ax3.set_xlabel('number of trials')
        ax3.spines['top'].set_visible(False)
        ax3.spines['right'].set_visible(False)
        ax3.set_title('The Law of Large Numbers')
        plt.show()
    else:
        print("Start the simulation by generating a fresh sample. Move the slider to see the graph.")


generate.on_click(create_sample)
out1 = widgets.interactive_output(main, {'n':sample})

display(ui1, out1)



<IPython.core.display.Javascript object>

HBox(children=(LoadedButton(description='generate', style=ButtonStyle()), IntSlider(value=1, continuous_update…

Output()

In [None]:
from ipywidgets import widgets
from IPython.display import display
from traitlets import traitlets

class LoadedButton(widgets.Button):
    """A button that can holds a value as a attribute."""

    def __init__(self, value=None, *args, **kwargs):
        super(LoadedButton, self).__init__(*args, **kwargs)
        # Create the value attribute.
        self.add_traits(value=traitlets.Any(value))

def add_num(ex):
    ex.value = ex.value+1
    print(ex.value)

lb = LoadedButton(description="Loaded", value=1)
lb.on_click(add_num)
display(lb)


# Confidence Intervals

Confidence interval of .95 contains true population mean in 95% of the cases. Sampling Error is the measurement of accuracy, where Margin of Error MOE is the measurement of the precision. The bigger is the sample size, the more precise [narrow] the sample confidence interval.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math

mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000)

fig = plt.figure(figsize=(20, 10))

ax = plt.subplot(111)
ax.plot(x, stats.norm.pdf(x, mu, sigma), color='Black')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_ylim([0, 0.5])
ax.set_xlim([-4, 4])
ax.fill_betweenx(stats.norm.pdf(x, mu, sigma), 0, x, color='DarkCyan')
ax.set_title('T-distribution')

plt.show()

In [None]:
from scipy.stats import t

# Freeze for df=20, loc=0, scale=1 and get the first four moments from stats()
rv = t(df=540, loc=0, scale=1)
mean, var, skew, kurt = rv.stats(moments='mvsk')
mean, var, skew, kurt

import numpy as np
from scipy.stats import t
import matplotlib.pyplot as plt
%matplotlib inline

rv = t(df=20, loc=0, scale=1)
x = np.linspace(rv.ppf(0.0001), rv.ppf(0.9999), 100)
y = rv.pdf(x) 

plt.xlim(-5,5)
plt.plot(x,y)

stats.ttest_1samp(y, 0)
len(y)

In [None]:
#@title

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual, IntSlider
import ipywidgets as widgets

def f(x, a, b):
    
    y = a*x+b
    plt.plot([0,0], [x,y])
    plt.xlim([-5, 5])
    plt.ylim([-5, 5])
    return(plt.show())


interact(f, x=(-5, 5, 0.1), a=4, b=4);



In [None]:
from IPython.display import display
button = widgets.Button(description="Click Me!")
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    with output:
        print("Button clicked.")

button.on_click(on_button_clicked)

In [None]:
r = 24

def function(r, alpha):
    
    return(r)

interact(function, r=13, alpha=34)

In [None]:
caption = widgets.Label(value='The values of slider1 and slider2 are synchronized')
sliders1, slider2 = widgets.IntSlider(description='Slider 1'),\
                    widgets.IntSlider(description='Slider 2')
l = widgets.link((sliders1, 'min'), (slider2, 'value'))
display(caption, sliders1, slider2)

In [None]:

int_range = widgets.IntSlider()
dr_range = widgets.IntSlider()

output2 = widgets.Output()
display(int_range, dr_range, output2)

def on_value_change(change):
    dr_range.value=int_range.value+4

def on_value_change1(change):
    int_range.value=dr_range.value+8



int_range.observe(on_value_change)
dr_range.observe(on_value_change1)


In [None]:
caption = widgets.Label(value='The values of range1 and range2 are synchronized')
slider = widgets.IntSlider(min=-5, max=5, value=1, description='Slider')

def handle_slider_change(change):
    caption.value = 'The slider value is ' + (
        'negative' if change.new < 0 else 'nonnegative'
    )

slider.observe(handle_slider_change, names='value')

display(caption, slider)

In [None]:
import math
slider1 = widgets.FloatSlider()
slider4 = widgets.FloatSlider()
display(slider1, slider4)

def slider1_handler(*args):
    slider1.value = slider4.value**2

def slider4_handler(*args):
    slider4.value = math.sqrt(slider1.value)

slider1.observe(slider4_handler)
slider4.observe(slider1_handler)

In [None]:


from ipywidgets import *

b1 = Button(description=u'\u03C3') # works
display(b1)

In [None]:
xx = np.linspace(1, 100, 100)
yy = []
for i in xx:
    yy.append(2**i)

multiplier = np.array([3, 5, 10])
xlim = 15
ylim = 2**xlim
yy = np.array(yy)

def martingale(x):
    return(2**x)

fig3 = plt.figure(figsize=(20, 10), dpi=300)
ax4 = plt.subplot(111)
ax4.plot(xx, yy)
for k in multiplier:
    ax4.plot(xx, yy*k)

ax4.set_xlabel('количество испытаний')
ax4.set_ylabel('ставка')
ax4.set_ylim([0, ylim])
ax4.set_xlim([0, xlim])
ax4.spines['top'].set_visible(False)
ax4.spines['right'].set_visible(False)
ax4.fill_between(xx, yy, alpha = 0.3)

plt.show()

