In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

### Importing the JOLTS data

In [None]:
jolts_counts = pd.read_csv('jolts_counts.csv')
jolts_percents = pd.read_csv('jolts_percents.csv')
jolts_annual_sums = pd.read_csv('jolts_yearly_sums.csv')

### Removing Not Seasonally Adjusted data from columns list

In [None]:
count_columns_by_element = {element: [col for col in jolts_counts.columns if element in col and not 'Not Seasonally Adjusted' in col] for element in data_elements}

percent_columns_by_element = {element: [col for col in jolts_percents.columns if element in col and not 'Not Seasonally Adjusted' in col] for element in data_elements}

# Job Openings vs Hirings

In [None]:
industries_job_openings = [col for col in count_columns_by_element['Job Openings'] if 'Total' not in col]
industries_hires = [col for col in count_columns_by_element['Hires'] if 'Total' not in col]

In [None]:
# Plot the cross-correlation matrix between job openings and hirings

job_openings_df = jolts_counts[industries_job_openings]
hiring_df = jolts_counts[industries_hires]

# --- Variables to define (assuming these were defined in the user's environment) ---
# jolts_counts = ... (Your main DataFrame)
# industries_job_openings = ... (List of JO column names)
# industries_hires = ... (List of Hiring column names)
# clean_label = lambda x: x.split('-')[0].strip() # Placeholder for cleaning function

# --- Start of Time Lag Implementation ---

# Define the number of months to lag the predictor (Job Openings)
# A value of 3 means we are checking the correlation between Job Openings 3 months ago and Hires today.
lag_months = 3 

# Apply the shift to the Job Openings DataFrame columns
# Shifting by +k aligns the JO data from t-k with the H data at t
job_openings_lagged_df = job_openings_df.shift(lag_months)

# Initialize the cross-correlation matrix
cross_corr_matrix_jo_hires = pd.DataFrame(index=job_openings_df.columns, columns=hiring_df.columns)

for jo_col in job_openings_df.columns:
    for h_col in hiring_df.columns:
        # Correlate the *shifted* Job Openings series with the *current* Hiring series
        correlation = job_openings_lagged_df[jo_col].corr(hiring_df[h_col])
        cross_corr_matrix_jo_hires.loc[jo_col, h_col] = correlation

# Convert the DataFrame to float
cross_corr_matrix_jo_hires = cross_corr_matrix_jo_hires.astype(float)

# --- The rest of your plotting code (remains the same) ---

# Sort rows and columns by average correlation 
sorted_index_jo = cross_corr_matrix_jo_hires.mean(axis=1).sort_values(ascending=False).index
sorted_columns_jo = cross_corr_matrix_jo_hires.mean(axis=0).sort_values(ascending=False).index
cross_corr_matrix_jo_hires = cross_corr_matrix_jo_hires.loc[sorted_index_jo, sorted_columns_jo]

# Plot the heatmap
sns.set(style="whitegrid")
plt.figure(figsize=(16, 12))
sns.heatmap(cross_corr_matrix_jo_hires, annot=True, fmt=".2f", cmap='RdBu_r', center=0, linewidths=0.5, linecolor='gray', cbar_kws={"shrink": 0.8})
plt.title(f'Job Openings (Lagged by {lag_months} Months) vs. Hirings Correlation Across Industries', fontsize=16)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
