# Distributions, Contours and Everything Else

In [None]:
import numpy as np
np.random.seed(1337)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

%matplotlib inline
plt.style.use('ggplot')

In [None]:
num_bins = 5  # number of bins in each dimension  Try: 5, 20, 50, 500 
num_samples = 10000000

data = 100 + 10 * np.random.randn(num_samples, 2)  # generate 100000 2-d random data points
joint_probs, edges = np.histogramdd(data, bins=num_bins)

edges = np.array(edges)[:,1:]

In [None]:
print(edges) 

In [None]:
joint_probs.shape

In [None]:
joint_probs

In [None]:
cut_point = 2


X_vals = np.array( joint_probs[cut_point] )     
X_vals /= X_vals.sum() 

# Brain twister: why do we need the cast to np array here? 
# Hint: Leads to a HUGE bug in analysis if you don't. Can you figure out why? 

# Try to figure it out. If you can't post in the Q&A and we will discuss  ***** 

Y_vals = np.array( joint_probs[:, cut_point]  ) 
Y_vals /= Y_vals.sum()

print("Cut X and Y on: ", cut_point)

In [None]:
print(edges[0])
print(X_vals)
print(Y_vals)

In [None]:
plt.bar(edges[0], X_vals)   #       
plt.ylabel('Frequency')
plt.xlabel('Num');
plt.title("Distribution of X_vals")
plt.show()

In [None]:
plt.bar(edges[1], Y_vals)   #       
plt.ylabel('Frequency')
plt.xlabel('Num');
plt.title("Distribution of Y_vals")
plt.show()

In [None]:
X = edges[0]
Y = edges[1]
X, Y = np.meshgrid(X, Y)

Z = joint_probs / joint_probs.sum()

In [None]:
fig = plt.figure()
ax = plt.axes(projection='3d')

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis')

ax.view_init(45, 30)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');

In [None]:
fig,ax = plt.subplots(1,1)
cp = ax.contourf(X, Y, Z)
fig.colorbar(cp) # Add a colorbar to a plot
ax.set_title('Contour Plot')
ax.set_xlabel('X')
ax.set_ylabel('Y')
plt.show()

## Dependence in Joints

In [None]:
import numpy as np

num_bins = 50   
num_samples = 10000

# Don't have to understand exactly how these values are being generated 
mean = [0, 0]
cov =  [[1, 0.8], 
        [0.8, 1.0]]  # diagonal covariance

data = 100 + 10 * np.random.multivariate_normal(mean, cov, num_samples)
joint_probs, edges = np.histogramdd(data, bins=num_bins)


edges = np.array(edges)[:,1:]
print(joint_probs)

In [None]:
cut_point = 14    # try 9, 2 and 14 

X_vals = np.array(joint_probs[cut_point])
X_vals /= X_vals.sum() 

Y_vals = np.array( joint_probs[:, cut_point]  ) 
Y_vals /= Y_vals.sum()

print("Cut X and Y on: ", cut_point)

In [None]:
plt.bar(edges[0], X_vals)   #       
plt.ylabel('Frequency')
plt.xlabel('Num');
plt.title("Distribution of X_vals")
plt.show()

In [None]:
# plt.bar(edges[1], Y_vals)   #       Let's not do this for Y right now 
# plt.ylabel('Frequency')
# plt.xlabel('Num');
# plt.title("Distribution of Y_vals")
# plt.show()

In [None]:
X = edges[0]
Y = edges[1]
X, Y = np.meshgrid(X, Y)

Z = joint_probs / joint_probs.sum()

In [None]:
fig = plt.figure(figsize=(9, 6))
ax = plt.axes(projection='3d')

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis')

ax.view_init(60, 75)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');

In [None]:
fig,ax = plt.subplots(1,1)
cp = ax.contourf(X, Y, Z)
fig.colorbar(cp) # Add a colorbar to a plot
ax.set_title('Contour Plot')
ax.set_xlabel('X')
ax.set_ylabel('Y')
plt.show()