In [None]:
# Import some basic libraries
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_context('paper')

# Hands-On Activity 5.1: The Uniform Distribution

## Objectives

+ To practice with the uniform distribution.

## The uniform distribution

The uniform distribution is the most common continuous distribution.
It corresponds to a random variable that is equally likely to take a value within a given interval.
We write:
$$
X\sim U([0,1]),
$$
and we read $X$ follows a uniform distribution taking values in $[0,1]$.

The probability density of the uniform is constant in $[0,1]$ and zero outside it.
We have:
$$
p(x) := U(x|[0,1]) := \begin{cases}
1,&\;0\le x \le 1,\\
0,&\;\text{otherwise}.
\end{cases}
$$

The cumulative distribution funciton of the uniform is for $x in [0,1]$:
$$
F(x) = p(X \le x) = \int_0^x p(u) du = \int_0^x du = x.
$$
Obviously, we have $F(x) = 0$ for $x < 0$ and $F(x) = 1$ for $x > 1$.

The probability that $X$ takes values in $[a,b]$ for $a < b$ in $[0,1]$ is:
$$
p(a \le X \le b) = F(b) - F(a) = b - a.
$$

The expectation of the uniform is:
$$
\mathbb{E}[X] = \int_0^1 xdx = \frac{1}{2}.
$$

The variance of the uniform is:
$$
\mathbb{V}[X] = \mathbb{E}[X^2] - \left(\mathbb{E}[X]\right)^2 = \frac{1}{3} - \frac{1}{4} = \frac{1}{12}.
$$

In [None]:
# Let's create a uniform random variable using scipy
import scipy.stats as st
X = st.uniform()

# Let's plot the PDF and the CDF
# PDF first
fig, ax = plt.subplots(dpi=150)
xs = np.linspace(-0.1, 1.1, 100)
ax.plot(xs, X.pdf(xs), lw=2)
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')

In [None]:
# Now the CDF
fig, ax = plt.subplots(dpi=150)
ax.plot(xs, X.cdf(xs), lw=2)
ax.set_xlabel('$x$')
ax.set_ylabel('$F(x)$')

In [None]:
# The expectation is:
print('E[X] = {0:1.2f}'.format(X.expect()))

In [None]:
# The variance is:
print('V[X] = {0:1.2f}'.format(X.var()))

In [None]:
# Here is how you can sample from the uniform
X.rvs(size=100)

In [None]:
# An alternative way is to use the functionality of numpy
np.random.rand(100)

In [None]:
# Finally, let's find the probability that X is between two numbers:
# Left bound
a = -1.0
# Right bound
b = 0.3
# The answer is:
prob_X_is_in_ab = X.cdf(b) - X.cdf(a)
print('p({0:1.2f} <= X <= {1:1.2f}) = {2:1.2f}'.format(a, b, prob_X_is_in_ab))

## The uniform distribution over an arbitrary interval $[a, b]$

The uniform distribution can also be defined over an arbitrary interval $[a,b]$.
We write:
$$
X \sim U([a, b]).
$$

The PDF of this random variable is:
$$
p(x) = \begin{cases}
c,&\;x\in[a,b],\\
0,&\;\text{otherwise},
\end{cases}
$$
where $c$ is a positive constant.
This simply tells us that the probability density of finding $X$ in $[a,b]$ is something positive and that the probability density of findinig outside is exactly zero.
The positive constant $c$ is determined by imposing the normalization condition:
$$
\int_{-\infty}^{+\infty}p(x)dx = 1.
$$
This gives:
$$
1 = \int_{-\infty}^{+\infty}p(x)dx = \int_a^bc dx = c \int_a^bdx = c (b-a).
$$
From this we get:
$$
c = \frac{1}{b - a},
$$
and we can now write:
$$
p(x) = \begin{cases}
\frac{1}{b-a},&x \in [a, b],\\
0,&\;\text{otherwise},
\end{cases}
$$
From the PDF, we can now find the CDF for $x \in [a,b]$:
$$
F(x) = p(X\le x) = \int_{-\infty}^x p(u)du = \int_a^x \frac{1}{b-a}du = \frac{1}{b-a}\int_a^xdu = \frac{x-a}{b-a}.
$$
The expectation is:
$$
\mathbb{E}[X] = \frac{1}{2}(a+b),
$$
and the variance is:
$$
\mathbb{V}[X] = \frac{1}{12}(b-a)^2.
$$

This is how you can do this using ``scipy.stats``:

In [None]:
# The left bound
a = -2.0
# The right bound
b = 5.0
X = st.uniform(loc=a, scale=(b-a))

# Here is the PDF
fig, ax = plt.subplots(dpi=150)
xs = np.linspace(a - 0.1, b + 0.1, 100)
ax.plot(xs, X.pdf(xs), lw=2)
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')

In [None]:
# And here is the CDF
fig, ax = plt.subplots(dpi=150)
xs = np.linspace(a - 0.1, b + 0.1, 100)
ax.plot(xs, X.cdf(xs), lw=2)
ax.set_xlabel('$x$')
ax.set_ylabel('$F(x)$')

In [None]:
# The expectation is:
print('E[X] = {0:1.2f}'.format(X.expect()))

In [None]:
# The variance is:
print('V[X] = {0:1.2f}'.format(X.var()))

In [None]:
# And here are a few random samples
X.rvs(size=100)

There is another way to obtain samples from $U([a,b])$ that uses only samples from $U([0,1])$.
Here is how. Let $Z$ be a standard uniform random variable:
$$
Z\sim U([0,1]).
$$
Then define the random variable:
$$
X = a + (b-a) Z.
$$
Then, $X\sim U([a,b])$. Why? Well, let's just show that the CDF of $X$ has the right form:
$$
p(X \le x) = p(a + (b-a)Z \le x) = p((b-a)Z \le x - a) = p\left(Z \le \frac{x-a}{b-a}\right) = \frac{x-a}{b-a},
$$
where the last step follows from the fact that the CDF of $Z$ is simply: $p(Z \le z) = z$.
Equipped with this result, we see that we can sample $X$ by sampling $Z$ and then scaling it appropriately (by the way this is what ``scipy.stats`` is doing internally). Here it is using ``numpy.random.rand`` to sample in $[0,1]$:

In [None]:
x_samples = a + (b - a) * np.random.rand(1000)
print(x_samples)

In [None]:
# Let's also do the histogram of x_samples to make sure they are distributed the right way
fig, ax = plt.subplots(dpi=150)
ax.hist(x_samples, density=True, alpha=0.25, label='Histogram')
ax.plot(xs, X.pdf(xs), lw=2, label='True PDF')
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')

## Questions

+ Repeat the code above so that the random variable is $U([1, 10])$.