## Setup

Import modules

In [21]:
import pandas as pd
import numpy as np

from scipy.stats import pearsonr

Define date constants

In [22]:
START_DATE = '2010-01'
END_DATE = '2022-12'

Import dataset

In [23]:
MEAT_PRODUCTION_DATASET = '../udataset/meat/production.csv'
meat_df = pd.read_csv(MEAT_PRODUCTION_DATASET)#
meat_df = meat_df.rename(columns={'DateTime': 'YearMonth'})
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry
0,2001-01,2.172000e+09,17000000.0,1.672000e+09,18000000.0,2.622200e+09,42700000.0,403400000.0,3.879000e+09,3.068300e+09
1,2001-02,1.852000e+09,15000000.0,1.467000e+09,17000000.0,2.322200e+09,39700000.0,461200000.0,3.351000e+09,2.823100e+09
2,2001-03,2.065000e+09,16000000.0,1.606000e+09,23000000.0,2.588600e+09,44400000.0,409300000.0,3.710000e+09,3.042300e+09
3,2001-04,1.910000e+09,15000000.0,1.514000e+09,19000000.0,2.515700e+09,42200000.0,462000000.0,3.458000e+09,3.019900e+09
4,2001-05,2.265000e+09,15000000.0,1.535000e+09,16000000.0,2.835600e+09,45600000.0,428800000.0,3.831000e+09,3.310000e+09
...,...,...,...,...,...,...,...,...,...,...
271,2023-08,2.329500e+09,4100000.0,2.282400e+09,8900000.0,4.156700e+09,51100000.0,433500000.0,4.624900e+09,4.641300e+09
272,2023-09,2.114600e+09,3500000.0,2.175500e+09,8600000.0,3.805500e+09,48400000.0,489200000.0,4.302200e+09,4.343100e+09
273,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09
274,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09


In [24]:
UNEMPLOYMENT_DATASET = '../udataset/unemployment_by_state.csv'
unemployment_df = pd.read_csv(UNEMPLOYMENT_DATASET)
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate
0,alabama,2010-01,11.7
1,alabama,2010-02,11.4
2,alabama,2010-03,11.0
3,alabama,2010-04,10.1
4,alabama,2010-05,9.9
...,...,...,...
8835,puerto rico,2023-10,5.5
8836,puerto rico,2023-11,5.7
8837,puerto rico,2023-12,5.8
8838,puerto rico,2024-01,5.8


In [25]:
HOURLY_EARNINGS='../udataset/hourly_earnings.csv'
earnings_df = pd.read_csv(HOURLY_EARNINGS)
earnings_df = earnings_df.rename(columns = {'YearMonth': 'date', 'State': 'state'})
#poverty_df['date'] = poverty_df['date'].str.slice(0, 7)
earnings_df

Unnamed: 0,state,date,HourlyEarnings
0,alabama,2007-01,19.24
1,alabama,2007-02,19.29
2,alabama,2007-03,19.40
3,alabama,2007-04,19.53
4,alabama,2007-05,19.42
...,...,...,...
10501,wyoming,2023-10,30.35
10502,wyoming,2023-11,30.54
10503,wyoming,2023-12,30.84
10504,wyoming,2024-01,30.91


In [26]:
unemployment_df = pd.merge(unemployment_df, earnings_df,
                           left_on=['YearMonth', 'State'],
                           right_on=['date', 'state'])

unemployment_df = unemployment_df[['State', 'YearMonth', 'UnemploymentRate', 'HourlyEarnings']]
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate,HourlyEarnings
0,alabama,2010-01,11.7,19.94
1,alabama,2010-02,11.4,19.94
2,alabama,2010-03,11.0,19.95
3,alabama,2010-04,10.1,19.93
4,alabama,2010-05,9.9,19.90
...,...,...,...,...
8665,wyoming,2023-10,2.4,30.35
8666,wyoming,2023-11,2.7,30.54
8667,wyoming,2023-12,2.6,30.84
8668,wyoming,2024-01,3.7,30.91


In [27]:
# Pivot the table for each metric and merge them into a single dataframe with YearMonth as the index
unemployment_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='UnemploymentRate'
)
earning_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='HourlyEarnings'
)

In [28]:
# Rename the columns
unemployment_pivot.columns = [f"{col.lower()}-unemployment-rate" for col in unemployment_pivot.columns]
earning_pivot.columns = [f"{col.lower()}-hourly-earnings" for col in earning_pivot.columns]

In [29]:
merged_df = pd.merge(unemployment_pivot, earning_pivot, left_index=True, right_index=True, how='outer')
merged_df = merged_df.sort_index(axis=1)

In [30]:
meat_df = pd.merge(meat_df, merged_df, on='YearMonth', how='right')
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry,...,virginia-hourly-earnings,virginia-unemployment-rate,washington-hourly-earnings,washington-unemployment-rate,west virginia-hourly-earnings,west virginia-unemployment-rate,wisconsin-hourly-earnings,wisconsin-unemployment-rate,wyoming-hourly-earnings,wyoming-unemployment-rate
0,2010-01,2.051100e+09,11300000.0,1.797800e+09,12200000.0,2.831200e+09,37800000.0,456300000.0,3.872400e+09,3.325300e+09,...,23.54,8.1,26.64,10.2,18.49,9.8,21.37,10.1,21.47,8.7
1,2010-02,1.927200e+09,10500000.0,1.745000e+09,11900000.0,2.739500e+09,34700000.0,424200000.0,3.694600e+09,3.198400e+09,...,23.97,8.1,26.81,10.1,18.70,10.4,21.40,10.4,21.39,8.3
2,2010-03,2.180700e+09,11900000.0,2.025100e+09,17000000.0,3.162000e+09,43400000.0,425400000.0,4.234700e+09,3.630800e+09,...,23.42,7.9,26.86,9.8,18.56,9.6,21.26,10.1,21.42,8.1
3,2010-04,2.110800e+09,10800000.0,1.835900e+09,12300000.0,3.038300e+09,40700000.0,490000000.0,3.969800e+09,3.569000e+09,...,23.24,7.2,26.81,8.9,18.59,8.5,21.38,8.8,21.66,7.4
4,2010-05,2.060700e+09,9800000.0,1.608700e+09,11800000.0,3.020000e+09,40200000.0,455100000.0,3.691000e+09,3.515300e+09,...,23.63,7.3,26.81,8.9,18.78,8.2,21.39,8.3,21.65,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09,...,33.74,3.0,40.41,4.2,27.88,3.7,32.26,2.8,30.35,2.4
166,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09,...,33.12,2.9,40.19,4.3,27.97,3.7,32.29,2.7,30.54,2.7
167,2023-12,2.158900e+09,3700000.0,2.332400e+09,9500000.0,3.615000e+09,41500000.0,449200000.0,4.504500e+09,4.105700e+09,...,33.51,2.7,40.56,4.6,28.16,3.9,32.97,2.9,30.84,2.6
168,2024-01,,,,,,,,,,...,34.00,2.7,41.16,5.4,28.44,4.8,33.13,2.8,30.91,3.7


In [31]:
# #unemployment_rate = [9.63, 8.95, 8.07, 7.37, 6.17, 5.28, 4.87, 4.36, 3.90, 3.67, 8.05, 5.35, 3.61]
# for state in unemployment_df['state'].unique():
#     temp_df = unemployment_df[unemployment_df['state'] == state].copy()
#     #temp_df['unemployment-rate'] = temp_df['below_poverty_line_percent']/temp_df['unemployment-rate']
#     temp_df = temp_df.rename(columns={'unemployment-rate': f'{state}-unemployment-rate', 'below_poverty_line_percent': f"{state}-poverty-percent"})
#     temp_df = temp_df.drop(columns=['state'])

#     print(temp_df.columns)
#     meat_df = pd.merge(meat_df, temp_df, on=['year-month'])

#meat_df = meat_df[meat_df['year-month'].str.match(r'*-12')]
# #print(new_df['unemployment_rate'].expanding().corr(new_df['S&P_growth']))
for col in meat_df.columns[1:]:
    meat_df[col] = meat_df[col].pct_change()

meat_df = meat_df.dropna()

  meat_df[col] = meat_df[col].pct_change()


In [32]:
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry,...,virginia-hourly-earnings,virginia-unemployment-rate,washington-hourly-earnings,washington-unemployment-rate,west virginia-hourly-earnings,west virginia-unemployment-rate,wisconsin-hourly-earnings,wisconsin-unemployment-rate,wyoming-hourly-earnings,wyoming-unemployment-rate
1,2010-02,-0.060407,-0.070796,-0.029369,-0.024590,-0.032389,-0.082011,-0.070348,-0.045915,-0.038162,...,0.018267,0.000000,0.006381,-0.009804,0.011357,0.061224,0.001404,0.029703,-0.003726,-0.045977
2,2010-03,0.131538,0.133333,0.160516,0.428571,0.154225,0.250720,0.002829,0.146186,0.135193,...,-0.022945,-0.024691,0.001865,-0.029703,-0.007487,-0.076923,-0.006542,-0.028846,0.001403,-0.024096
3,2010-04,-0.032054,-0.092437,-0.093427,-0.276471,-0.039121,-0.062212,0.151857,-0.062555,-0.017021,...,-0.007686,-0.088608,-0.001862,-0.091837,0.001616,-0.114583,0.005644,-0.128713,0.011204,-0.086420
4,2010-05,-0.023735,-0.092593,-0.123754,-0.040650,-0.006023,-0.012285,-0.071224,-0.070230,-0.015046,...,0.016781,0.013889,0.000000,0.000000,0.010221,-0.035294,0.000468,-0.056818,-0.000462,-0.054054
5,2010-06,0.111661,0.061224,0.130540,0.127119,0.039636,0.087065,-0.035816,0.119805,0.030410,...,-0.031316,0.000000,-0.002984,-0.011236,-0.001065,0.024390,-0.015428,0.024096,-0.026328,-0.057143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,2023-10,0.088054,0.142857,0.106045,0.139535,0.097701,0.045455,-0.118561,0.097299,0.072759,...,0.019643,0.000000,0.009493,0.050000,-0.003574,0.000000,0.005298,-0.034483,0.010656,0.043478
166,2023-11,-0.023253,0.025000,-0.011969,0.030612,-0.086180,-0.120553,0.160250,-0.017349,-0.063746,...,-0.018376,-0.033333,-0.005444,0.023810,0.003228,0.000000,0.000930,-0.035714,0.006260,0.125000
167,2023-12,-0.039336,-0.097561,-0.018928,-0.059406,-0.052996,-0.067416,-0.102139,-0.028972,-0.058779,...,0.011775,-0.068966,0.009206,0.069767,0.006793,0.054054,0.021059,0.074074,0.009823,-0.037037
168,2024-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.014623,0.000000,0.014793,0.173913,0.009943,0.230769,0.004853,-0.034483,0.002270,0.423077


In [33]:
meat_df[meat_df.columns[1:]].corr().to_csv('../udataset/state_pearson_correlation.csv')
meat_df[meat_df.columns[1:]].corr(method = 'spearman').to_csv('../udataset/state_spearman_correlation.csv')
# meat_df['expanding_correlation'] = meat_df['delaware-poverty-percent'].expanding().corr(meat_df['Beef'])
# #meat_df['expanding_correlation'].to_csv('../udataset/state-correlation.csv')

# plt.figure(figsize=(10, 5))
# plt.plot(meat_df['year-month'], meat_df['expanding_correlation'], label='Cali')
# plt.xlabel('Year')
# plt.ylabel('Below Poverty Line (%)')
# plt.legend()
# plt.grid(True)
# plt.show()
#x = meat_df['expanding_correlation']
# plt.plot()

In [34]:
meat_df.to_csv('../udataset/meat_earnings_data.csv')

In [35]:
# Create an empty DataFrame to store the p-values
p_values = pd.DataFrame(index=meat_df.columns[1:], columns=meat_df.columns[1:])

In [36]:
for col1 in meat_df.columns[1:]:
    for col2 in meat_df.columns[1:]:
        if col1 == col2:
            p_values.at[col1, col2] = np.NaN  # No need to calculate p-value for itself
        else:
            _, p_value = pearsonr(meat_df[col1], meat_df[col2])
            p_values.at[col1, col2] = p_value

In [37]:
p_values.to_csv('../udataset/p_values_earnings_state.csv')