# Dispersion 

In [None]:
import matplotlib 
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np 

In [None]:
import seaborn as sns
sns.set(color_codes=True)
sns.set_style("white")     # See more styling options here: https://seaborn.pydata.org/tutorial/aesthetics.html

In [None]:
np.random.uniform(low=0.0, high=1.0)      

In [None]:
np.random.uniform(low=0.0, high=1.0, size = 10)
# All values equally likely. Sort of like     [  0.0   0.2   0.4    0.6    0.8   1.0 ]

## Generating a LOT of numbers 

In [None]:
num_samples = 10000   # get rid of 'magic numbers'

In [None]:
uniform_vals = np.random.uniform(low=0.0, high=10.0, size = num_samples)    

In [None]:
sns.distplot(uniform_vals, bins=20, kde=False)
plt.ylabel('Frequency')
plt.xlabel('Value');
plt.title("Unform Values")
sns.despine(offset=10, trim=True);  # move axes away 
plt.show()

In [None]:
normal_vals = np.random.normal(loc=5.0, scale=10.0, size = num_samples)     
# More values closer to `loc`    ... Sorf of like [ 1 5 5 5 5 7 ]

In [None]:
sns.distplot(normal_vals, bins=20, kde=False)
plt.ylabel('Frequency')
plt.xlabel('Value');
plt.title("Normal Values")
sns.despine(offset=10, trim=True);  # move axes away 
plt.show()

In [None]:
print("Uniform vals mean:", np.mean(uniform_vals))
print("Normal vals mean: ", np.mean(normal_vals))

In [None]:
print("Uniform vals variance:", np.var(uniform_vals))
print("Normal vals variance: ", np.var(normal_vals))

In [None]:
print("Uniform vals sd:", np.std(uniform_vals))
print("Normal vals sd: ", np.std(normal_vals))

In [None]:
normal_vals = np.random.normal(loc=5.0, scale=10.0, size = num_samples)

nv_mean = np.mean(normal_vals)
nv_sd = np.std(normal_vals)

In [None]:
sns.distplot(normal_vals, bins=20, kde=False)
# plt.xlim(-60, 60)

plt.ylabel('Frequency')
plt.xlabel('Value');
plt.title("Normal Values")
sns.despine(offset=10, trim=True);  # move axes away 

# plot the SD line 
x_c, y_c = ([nv_mean, nv_mean], [0, 1500])
plt.plot(x_c, y_c, color='green', linewidth=2)

x_c, y_c = ([nv_mean + nv_sd, nv_mean + nv_sd], [0, 1500])
plt.plot(x_c, y_c, color='red', linewidth=2)


x_c, y_c = ([nv_mean - nv_sd, nv_mean - nv_sd], [0, 1500])
plt.plot(x_c, y_c, color='red', linewidth=2)

plt.show()

In [None]:
normal_vals_2 = np.random.normal(loc=5.0, scale=15.0, size = num_samples)
nv_mean_2 = np.mean(normal_vals_2)
nv_sd_2 = np.std(normal_vals_2)

In [None]:
sns.distplot(normal_vals_2, bins=20, kde=False)
# plt.xlim(-60, 60)

plt.ylabel('Frequency')
plt.xlabel('Value');
plt.title("Normal Values")
sns.despine(offset=10, trim=True);  # move axes away 

# plot the SD line 
x_c, y_c = ([nv_mean_2, nv_mean_2], [0, 1500])
plt.plot(x_c, y_c, color='green', linewidth=2)

x_c, y_c = ([nv_mean_2 + nv_sd_2, nv_mean_2 + nv_sd_2], [0, 1500])
plt.plot(x_c, y_c, color='red', linewidth=2)


x_c, y_c = ([nv_mean_2 - nv_sd_2, nv_mean_2 - nv_sd_2], [0, 1500])
plt.plot(x_c, y_c, color='red', linewidth=2)

plt.show()

In [None]:
# Let's put both together
normal_vals = np.random.normal(loc=5.0, scale=10.0, size = num_samples)
normal_vals_2 = np.random.normal(loc=5.0, scale=15.0, size = num_samples)

def plot_dist(vals, label): 
    nv_mean = np.mean(vals)
    nv_sd = np.std(vals)
    
    sns.distplot(vals, bins=20, kde=False)
    
    plt.xlim(-60, 60)

    plt.ylabel('Frequency')
    plt.xlabel('Value');
    plt.title(label)
    sns.despine(offset=10, trim=True);  # move axes away 

    # plot the SD line 
    x_c, y_c = ([nv_mean, nv_mean], [0, 1500])
    plt.plot(x_c, y_c, color='green', linewidth=1)

    x_c, y_c = ([nv_mean + nv_sd, nv_mean + nv_sd], [0, 1500])
    plt.plot(x_c, y_c, color='red', linewidth=1)


    x_c, y_c = ([nv_mean - nv_sd, nv_mean - nv_sd], [0, 1500])
    plt.plot(x_c, y_c, color='red', linewidth=1)

    plt.show()


    
plot_dist(normal_vals, "Less Variant")
plot_dist(normal_vals_2, "More Variant")
