In [None]:
import matplotlib.pyplot as plt

In [None]:
import numpy as np

In [None]:
# Use this if you want percentages on y-axis
from matplotlib.ticker import PercentFormatter

# Histograms

We often work with raw data when creating histograms. If we are given a frequency or relative frequency table, we can create a bar graph and label the bars with numeric values.

In `matplotlib.pyplot`, the classes are referred to as **bins**

Below contains a possible list for the dog weights introduced in the beginning of the section's notes. *Note*: The data values do not need to be in any order.

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]  # Lower class limits

fig, ax = plt.subplots()
ax.hist(dog_weights, bins=bins, edgecolor='black')
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Frequecy")
ax.set_title("Histogram of Dog Weights")

plt.show()

We can generate the same histogram using a predetermined class width (5 in this case).

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

num_bins = 6  # adjust this number based on how many bins you want
lcl_1 = 35  # the lower class limit of the first class
class_width = 5
bins = [lcl_1 + class_width*i for i in range(num_bins+1)]  # generate the list of lower class limits

fig, ax = plt.subplots()
ax.hist(dog_weights, bins=bins, edgecolor='black')
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Frequecy")
ax.set_title("Histogram of Dog Weights")

plt.show()

A few things of note moving forward:

* We will usually have many more bins in our histograms than we did in the examples in this section.
* The `edgecolor = 'black'` can be omitted; I, personally, like to include it.

## Notes: Example 2

In [None]:
measurements = [
    9,2,10,1,4,
    5,1,6,7,4,
    6,5,4,8,10,
    3,1,2,3,9,
    8,6,1,1,10
]

num_bins = 6
lcl_1 = np.min(measurements)
class_width = 2
bins = [lcl_1 + class_width*i for i in range(num_bins+1)]

fig, ax = plt.subplots()
ax.hist(measurements, bins=bins, edgecolor='black')
ax.set_xlabel("Measurements")
ax.set_ylabel("Frequecy")
ax.set_title("Histogram of Example 2")

plt.show()

# Relative Frequency Histograms

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

rel_weights = np.ones_like(dog_weights)/len(dog_weights) # Get the relative frequency of each dog weight
bins = [35, 40, 45, 50, 55, 60, 65]  # Lower class limits

fig, ax = plt.subplots()
ax.hist(dog_weights, bins=bins, edgecolor='black', weights=rel_weights)
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Relative Frequecy")
ax.set_title("Histogram of Dog Weights")

plt.show()

## Relative Frequency Histograms with Percent Y-Axis

The following will convert the y-axis on the chart above to percents. Simply add the following information:

* Include the `100 *` in the `rel_weights` calculation.
* Include the `ax.yaxis.set_major_formatter(PercentFormatter())` before `plt.show()`

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

rel_weights = 100 * np.ones_like(dog_weights)/len(weights) # Get the relative frequency of each dog weight
bins = [35, 40, 45, 50, 55, 60, 65]  # Lower class limits

fig, ax = plt.subplots()
ax.hist(dog_weights, bins=bins, edgecolor='black', weights=rel_weights)
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Relative Frequecy")
ax.set_title("Histogram of Dog Weights")
ax.yaxis.set_major_formatter(PercentFormatter())  # this is what you need

plt.show()

# Density Histograms

This is incredibly simple. All we have to do is include a `density = True` in the `ax.hist` method.

The following is the density histogram for the dog weights.

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

num_bins = 6  # adjust this number based on how many bins you want
lcl_1 = 35  # the lower class limit of the first class
class_width = 5
bins = [lcl_1 + class_width*i for i in range(num_bins+1)]  # generate the list of lower class limits

fig, ax = plt.subplots()
ax.hist(dog_weights, bins=bins, edgecolor='black', density=True)
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Denisty")
ax.set_title("Density Histogram of Dog Weights")

plt.show()

# Cumulative Histograms

Like density histograms, cumulative histograms are easy to make using matplotlib. Typically the more something is needed for people using this in the real world, the easier they try to make it on them.

For cumulative histograms, all you have to do is include `cumulative = True` in the `ax.hist` method.

The following is a cumulative histogram of the 25 dog's weights example.

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]  # Lower class limits

fig, ax = plt.subplots()
# include cumulative=True to get cumulative histogram
ax.hist(dog_weights, bins=bins, edgecolor='black', cumulative=True)
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Frequecy")
ax.set_title("Cumulative Histogram of Dog Weights")

plt.show()

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]  # Lower class limits

fig, ax = plt.subplots()
# include cumulative=True to get cumulative histogram
ax.hist(
    dog_weights,
    bins=bins,
    edgecolor='black',
    cumulative=True,
    density=True
)
ax.set_xlabel("Dog Weights")
ax.set_ylabel("Relative Frequecy")
ax.set_title("Cumulative Relative Frequency Histogram of Dog Weights")

plt.show()

# Addendum: Free Alternative to Matplotlib

While matplotlib is capable of doing what we need it to do, there are other libraries that we can use to display our histograms. Perhaps they might be easier to get what we want.

One of those libraries is Seaborn.

In [None]:
# We have to import seaborn like we do with matplotlib
import seaborn as sns

In [None]:
sns.set_theme(style="whitegrid", palette="pastel")

## Frequency Histogram with Seaborn

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='count'
)

ax.set_title("Frequency Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

## Relative Frequency Histogram with Seaborn

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='proportion'
)

ax.set_title("Frequency Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

## Percent Relative Frequency Histogram with Seaborn

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='percent'
)

ax.set_title("Percent Relative Frequency Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

## Density Histogram with Seaborn

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='density'
)

ax.set_title("Density Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

## Cumulative Frequency and Relative Frequency Histograms with Seaborn

We just need to add the `cumulative=True` to our `sns.histplot` method.

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='count',
    cumulative=True
)

ax.set_title("Cumulative Frequency Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

fig, ax = plt.subplots()
sns.histplot(
    data=dog_weights,
    bins=bins,
    stat='proportion',
    cumulative=True
)

ax.set_title("Cumulative Relative Frequency Histogram of Dog Weights")
ax.set_xlabel("Weights")
plt.show()

# Plotly Express for Histograms

Plotly express allows you to create interactive visual displays of data. 

You simply have to hover over a bar to get the information needed.
You can also hide the entire plot by clicking on the color square in the legend.

**Plotly express is at its best when working with dataframes**; which you can think of as data tables or spreadsheets.

In [None]:
import plotly.express as px

## Frequency Histogram with Plotly Express

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Frequency Histogram of Dog Weights"
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Frequency"
)

## Relative Frequency Histogram with Plotly Express

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Relative Frequency Histogram of Dog Weights",
    histnorm = 'probability'  # use 'probability' for relative frequency histogram
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Relative Frequency"
)

## Relative Frequency Histogram Using Percents with Plotly Express

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Percent Relative Frequency Histogram of Dog Weights",
    histnorm = 'percent'  # use 'percent' for percent relative frequency histogram
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Percent"
)

## Density Histogram with Plotly Express

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Density Histogram of Dog Weights",
    histnorm = 'probability density'
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Density"
)

## Cumulative Frequency Histogram with Plotly Express

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Cumulative Frequency Histogram of Dog Weights",
    cumulative = True
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Cumulative Frequency"
)

In [None]:
dog_weights = [
    37, 39, 41, 42, 43, 43,
    45, 45, 46, 46, 47, 47,
    47, 48, 48, 50, 50, 51,
    51, 52, 53, 53, 54, 57, 62
]

bins = [35, 40, 45, 50, 55, 60, 65]

px.histogram(
    dog_weights,
    nbins = 6,  # set the number of bins
    title = "Cumulative Relative Frequency Histogram of Dog Weights",
    histnorm = 'probability',
    cumulative = True
).update_layout(
    xaxis_title="Dog Weights", yaxis_title="Cumulative Relative Frequency"
)