In [1]:
# Import packages
import numpy as np
import pandas as pd
import pandas_datareader as pddr
import datetime as dt
import os
import matplotlib.pyplot as plt

from bokeh.io import output_file
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Title, Legend, HoverTool
from bokeh.models.tickers import SingleIntervalTicker
from usempl_streaks import get_usempl_data, usempl_streaks
from get_payems import get_payems_data

In [2]:
start_date = 'min'
end_date = 'max'
recession_bars = True
download = True
fig_title_str = "Time Series of US Monthly Nonfarm Payroll Employment (PAYEMS)"
html_show = True

In [3]:
# Create data and images directory as well as recession data path
cur_path = "/Users/richardevans/Docs/Economics/OSE/usempl_plots/usempl_plots"
image_dir = os.path.join(cur_path, "images")
data_dir = os.path.join(cur_path, "data")
recession_data_path = os.path.join(data_dir, 'recession_data.csv')

In [4]:
# Get the employment data
if start_date == 'min':
    beg_date_str = "1919-01-01"
else:
    try:
        beg_date_test = dt.datetime.strptime(start_date, "%Y-%m-%d")
    except:
        err_msg = (
            "Error get_payems.py: start_date input must be either a " +
            "date string in 'YYYY-mm-dd' format or 'min'."
        )
        raise ValueError(err_msg)
    beg_date_str = start_date

if end_date == 'max':
    end_date_str = "today"
else:
    try:
        end_date_test = dt.datetime.strptime(end_date, "%Y-%m-%d")
    except:
        err_msg = (
            "Error get_payems.py: end_date input must be either a " +
            "date string in 'YYYY-mm-dd' format or 'max'."
        )
        raise ValueError(err_msg)
    end_date_str = end_date

if end_date_str == "today":
    download_date = dt.datetime.today()
else:
    download_date = dt.datetime.strptime(end_date_str, "%Y-%m-%d")
download_date_str = download_date.strftime("%Y-%m-%d")
if download:
    usempl_df, beg_date_str2, end_date_str2 = get_payems_data(
        beg_date_str=beg_date_str,
        end_date_str=end_date_str,
        file_path=None
    )
    print(
        "PAYEMS data downloaded on " +  download_date_str +
        " and has most recent PAYEMS data month of " + end_date_str2 + "."
    )
else:
    usempl_df, beg_date_str2, end_date_str2 = get_payems_data(
        beg_date_str=beg_date_str,
        end_date_str=end_date_str,
        file_path=os.path.join(data_dir, "usempl_" + end_date_str + ".csv")
    )
    print(
        "PAYEMS data loaded from memory on " +  download_date_str +
        " and has most recent PAYEMS data month of " + end_date_str2 + "."
    )

Beginning date of U.S. employment series is 1919-07-01
End date of U.S. employment series is 2024-03-01
PAYEMS data downloaded on 2024-04-25 and has most recent PAYEMS data month of 2024-03-01.


In [5]:
# Create a dataframe that only contains dates less than 1939-01-01
usempl_imputed_df = usempl_df[usempl_df['Date'] < '1939-01-01']
usempl_monthly_df = usempl_df[usempl_df['Date'] >= '1939-01-01']
# Create a dataframe usempl_annual_df that only contains data from before
# 1939 and only for month 7
usempl_annual_df = usempl_imputed_df[
    usempl_imputed_df['Date'].dt.month == 7
]
usempl_imputed_cds = ColumnDataSource(usempl_imputed_df)
usempl_annual_cds = ColumnDataSource(usempl_annual_df)
usempl_monthly_cds = ColumnDataSource(usempl_monthly_df)
usempl_cds = ColumnDataSource(usempl_df)

In [6]:
# Create recession data column data source object
recession_df = pd.read_csv(
    recession_data_path, parse_dates=['Peak','Trough']
)
recession_data_length = len(recession_df['Peak'])

In [8]:
# Create Bokeh plot of PAYEMS time series
fig_title = fig_title_str
filename = "tseries_payems_" + end_date_str2 + ".html"
output_file(os.path.join(image_dir, filename), title=fig_title)

# Format the tooltip
tooltips = [
    ("Date", "@Date{%F}"),
    ("Employment", "@PAYEMS{0,0.}"),
    ("Monthly change", "@diff_monthly{0,0.}"),
    ("Year-over-year change", "@diff_yoy{0,0.}")
]

min_date = usempl_df['Date'].min()
max_date = usempl_df['Date'].max()
min_y_val = usempl_df['PAYEMS'].min()
max_y_val = usempl_df['PAYEMS'].max()
range_y_vals = max_y_val - min_y_val
fig_buffer_pct = 0.10
fig = figure(
    plot_height=500,
    plot_width=800,
    x_axis_label="Date (months)",
    y_axis_label="US nonfarm payroll employment",
    y_range=(
        min_y_val - fig_buffer_pct * range_y_vals,
        max_y_val + fig_buffer_pct * range_y_vals,
    ),
    # x_range=(
    #     (-bkwd_mths_main - fig_buffer_pct * datarange_main_mths),
    #     (frwd_mths_main + fig_buffer_pct * datarange_main_mths),
    # ),
    tools=[
        "save",
        "zoom_in",
        "zoom_out",
        "box_zoom",
        "pan",
        "undo",
        "redo",
        "reset",
        "hover",
        "help",
    ],
    toolbar_location="left",
)
fig.toolbar.logo = None

# Set title font size and axes font sizes
fig.title.text_font_size = '18pt'
fig.xaxis.axis_label_text_font_size = '12pt'
fig.xaxis.major_label_text_font_size = '12pt'
fig.yaxis.axis_label_text_font_size = '12pt'
fig.yaxis.major_label_text_font_size = '12pt'

# Modify tick intervals for X-axis and Y-axis
fig.xaxis.ticker = SingleIntervalTicker(interval=10, num_minor_ticks=2)
fig.xgrid.ticker = SingleIntervalTicker(interval=10)
fig.yaxis.ticker = SingleIntervalTicker(interval=5, num_minor_ticks=5)
fig.ygrid.ticker = SingleIntervalTicker(interval=5)

# Create lin plot of the PAYEMS data using the usempl_cds ColumnDataSource
fig.line(
    x='Date',
    y='PAYEMS',
    source=usempl_cds,
    line_width=2,
    line_color='blue',
    legend_label="PAYEMS",
)

if html_show:
    show(fig)

In [None]:
if recession_bars:
    # Create recession bars
    for x in range(0, recession_data_length):
        peak_date = recession_df['Peak'][x]
        trough_date = recession_df['Trough'][x]
        if(peak_date >= min_date and trough_date >= min_date):
            fig.patch(
                x=[peak_date, trough_date, trough_date, peak_date],
                y=[-100, -100, max_y_val+ 10, max_y_val + 10],
                fill_color='gray',
                fill_alpha=0.4,
                line_width=0,
                legend_label='Recession'
            )
        if (
            peak_date == trough_date and
            peak_date >= min_date and
            trough_date >= min_date
        ):
            fig.patch(
                x=[peak_date, trough_date + 1, trough_date + 1, peak_date],
                y=[-100, -100, max_y_val + 10, max_y_val + 10],
                fill_color='gray',
                fill_alpha=0.4,
                line_width=0,
                legend_label='Recession'
            )