# Exercises 7: Solutions

In [1]:
import math

import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

In [None]:
transactions = pd.read_csv(
    "../../0_data/sales/transactions.csv",
    parse_dates=["transaction_date"]
)
transactions.head(3)

In [3]:
# Set ggplot style
matplotlib.style.use("seaborn-v0_8")

## Plot histograms

In [None]:
# Get numerical columns.
numeric = transactions.select_dtypes("number")
numeric.columns

In [None]:
# Create a new Figure
fig = plt.figure(figsize=(12, 3))

# Create histogram plots for numerical columns
for idx, column in enumerate(numeric.columns, start=1):
    ax = fig.add_subplot(1, 4, idx)
    ax.hist(numeric[column], edgecolor="white")
    ax.set_title(column)

### GridSpec

In [None]:
# Maak Figure en GridSpec van 3 rijen en 4 kolommen.
fig = plt.figure(figsize=(6, 6))
grid = plt.GridSpec(3, 4)

In [None]:
# Create the big histogram.
ax_big = fig.add_subplot(grid[0:3, 0:3])
ax_big.hist(numeric["line_nr"], edgecolor="white")
ax_big.set_title("line_nr")

In [8]:
# Create the small histograms.
for idx, column in enumerate(("quantity", "price", "total")):
    ax = fig.add_subplot(grid[idx, 3])
    ax.hist(numeric[column], edgecolor="white")
    ax.set_title(column)


In [None]:
# Display the figure.
fig.tight_layout()
fig

## Plot DataFrame

In [12]:
def plot_dataframe(df, plot_cols=4, plot_size=3, topn=10):
    numerical = df.select_dtypes("number")
    categorical = df.select_dtypes(["object", "category"])

    # Compute number of subplot rows
    plot_rows = math.ceil((numerical.shape[1] + categorical.shape[1]) / plot_cols)

    # Create axes and set figure dimensions
    fig = plt.figure(figsize=(plot_cols * plot_size, plot_rows * plot_size))

    # Create histogram for numerical columns
    index = 0
    for column in numerical.columns:
        index += 1
        ax = fig.add_subplot(plot_rows, plot_cols, index)
        ax.hist(numerical[column], edgecolor="white")
        ax.set_title(column)


    # Create top N values for categorical columns
    for column in categorical.columns:
        counts = categorical[column].value_counts().head(topn)

        index += 1
        ax = fig.add_subplot(plot_rows, plot_cols, index)
        ax.bar(counts.index, counts)
        ax.tick_params(rotation=90)
        ax.set_title(column)

    fig.tight_layout()

In [None]:
plot_dataframe(transactions)