## Setup

Import modules

In [172]:
import pandas as pd
import numpy as np

from scipy.stats import pearsonr

Define date constants

In [173]:
START_DATE = '2010-01'
END_DATE = '2022-12'

Import dataset

In [174]:
MEAT_PRODUCTION_DATASET = '../udataset/meat/production.csv'
meat_df = pd.read_csv(MEAT_PRODUCTION_DATASET)#
meat_df = meat_df.rename(columns={'DateTime': 'YearMonth'})
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry
0,2001-01,2.172000e+09,17000000.0,1.672000e+09,18000000.0,2.622200e+09,42700000.0,403400000.0,3.879000e+09,3.068300e+09
1,2001-02,1.852000e+09,15000000.0,1.467000e+09,17000000.0,2.322200e+09,39700000.0,461200000.0,3.351000e+09,2.823100e+09
2,2001-03,2.065000e+09,16000000.0,1.606000e+09,23000000.0,2.588600e+09,44400000.0,409300000.0,3.710000e+09,3.042300e+09
3,2001-04,1.910000e+09,15000000.0,1.514000e+09,19000000.0,2.515700e+09,42200000.0,462000000.0,3.458000e+09,3.019900e+09
4,2001-05,2.265000e+09,15000000.0,1.535000e+09,16000000.0,2.835600e+09,45600000.0,428800000.0,3.831000e+09,3.310000e+09
...,...,...,...,...,...,...,...,...,...,...
271,2023-08,2.329500e+09,4100000.0,2.282400e+09,8900000.0,4.156700e+09,51100000.0,433500000.0,4.624900e+09,4.641300e+09
272,2023-09,2.114600e+09,3500000.0,2.175500e+09,8600000.0,3.805500e+09,48400000.0,489200000.0,4.302200e+09,4.343100e+09
273,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09
274,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09


In [175]:
UNEMPLOYMENT_DATASET = '../udataset/unemployment_by_state.csv'
unemployment_df = pd.read_csv(UNEMPLOYMENT_DATASET)
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate
0,alabama,2010-01,11.7
1,alabama,2010-02,11.4
2,alabama,2010-03,11.0
3,alabama,2010-04,10.1
4,alabama,2010-05,9.9
...,...,...,...
8835,puerto rico,2023-10,5.5
8836,puerto rico,2023-11,5.7
8837,puerto rico,2023-12,5.8
8838,puerto rico,2024-01,5.8


In [176]:
POVERTYDATA ='../udataset/wealth_data_interpolated.csv'
poverty_df = pd.read_csv(POVERTYDATA)

poverty_df['date'] = poverty_df['date'].str.slice(0, 7)
poverty_df

Unnamed: 0,date,below_poverty_line_percent,state
0,2010-01,17.100000,alabama
1,2010-02,17.142466,alabama
2,2010-03,17.180822,alabama
3,2010-04,17.223288,alabama
4,2010-05,17.264384,alabama
...,...,...,...
7535,2021-09,10.700000,wyoming
7536,2021-10,10.700000,wyoming
7537,2021-11,10.700000,wyoming
7538,2021-12,10.700000,wyoming


In [177]:
unemployment_df = pd.merge(unemployment_df, poverty_df,
                           left_on=['YearMonth', 'State'],
                           right_on=['date', 'state'])

unemployment_df = unemployment_df[['State', 'YearMonth', 'UnemploymentRate', 'below_poverty_line_percent']]
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate,below_poverty_line_percent
0,alabama,2010-01,11.7,17.100000
1,alabama,2010-02,11.4,17.142466
2,alabama,2010-03,11.0,17.180822
3,alabama,2010-04,10.1,17.223288
4,alabama,2010-05,9.9,17.264384
...,...,...,...,...
7535,puerto rico,2021-09,7.9,42.346325
7536,puerto rico,2021-10,7.1,42.308155
7537,puerto rico,2021-11,7.3,42.270173
7538,puerto rico,2021-12,6.7,42.234886


In [178]:
# Pivot the table for each metric and merge them into a single dataframe with YearMonth as the index
unemployment_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='UnemploymentRate'
)
poverty_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='below_poverty_line_percent'
)

In [179]:
# Rename the columns
unemployment_pivot.columns = [f"{col.lower()}-unemployment-rate" for col in unemployment_pivot.columns]
poverty_pivot.columns = [f"{col.lower()}-poverty-rate" for col in poverty_pivot.columns]

In [180]:
merged_df = pd.merge(unemployment_pivot, poverty_pivot, left_index=True, right_index=True, how='outer')
merged_df = merged_df.sort_index(axis=1)

In [181]:
meat_df = pd.merge(meat_df, merged_df, on='YearMonth', how='right')
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry,...,virginia-poverty-rate,virginia-unemployment-rate,washington-poverty-rate,washington-unemployment-rate,west virginia-poverty-rate,west virginia-unemployment-rate,wisconsin-poverty-rate,wisconsin-unemployment-rate,wyoming-poverty-rate,wyoming-unemployment-rate
0,2010-01,2.051100e+09,11300000.0,1.797800e+09,12200000.0,2.831200e+09,37800000.0,456300000.0,3.872400e+09,3.325300e+09,...,10.300000,8.1,12.100000,10.2,17.400000,9.8,11.600000,10.1,9.800000,8.7
1,2010-02,1.927200e+09,10500000.0,1.745000e+09,11900000.0,2.739500e+09,34700000.0,424200000.0,3.694600e+09,3.198400e+09,...,10.333973,8.1,12.133973,10.1,17.408493,10.4,11.630123,10.4,9.803154,8.3
2,2010-03,2.180700e+09,11900000.0,2.025100e+09,17000000.0,3.162000e+09,43400000.0,425400000.0,4.234700e+09,3.630800e+09,...,10.364658,7.9,12.164658,9.8,17.416164,9.6,11.658003,10.1,9.811124,8.1
3,2010-04,2.110800e+09,10800000.0,1.835900e+09,12300000.0,3.038300e+09,40700000.0,490000000.0,3.969800e+09,3.569000e+09,...,10.398630,7.2,12.198630,8.9,17.424658,8.5,11.689596,8.8,9.825111,7.4
4,2010-05,2.060700e+09,9800000.0,1.608700e+09,11800000.0,3.020000e+09,40200000.0,455100000.0,3.691000e+09,3.515300e+09,...,10.431507,7.3,12.231507,8.9,17.432877,8.2,11.720876,8.3,9.843309,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,2021-09,2.259700e+09,4300000.0,2.253600e+09,9200000.0,3.928500e+09,45800000.0,474200000.0,4.526800e+09,4.448500e+09,...,9.944363,3.4,9.923599,4.6,16.823599,3.9,10.700000,2.9,10.700000,3.3
141,2021-10,2.324900e+09,4400000.0,2.360000e+09,10100000.0,3.827800e+09,44300000.0,470500000.0,4.699400e+09,4.342600e+09,...,9.955981,3.1,9.916626,4.1,16.816626,3.6,10.700000,2.5,10.700000,3.3
142,2021-11,2.342800e+09,4500000.0,2.397900e+09,10900000.0,3.665300e+09,40600000.0,485500000.0,4.756100e+09,4.191400e+09,...,9.969400,2.1,9.910183,3.9,16.810183,3.6,10.700000,2.4,10.700000,3.5
143,2021-12,2.326800e+09,5100000.0,2.393800e+09,10500000.0,3.700100e+09,44700000.0,453300000.0,4.736200e+09,4.198100e+09,...,9.983754,2.7,9.904742,3.8,16.804742,3.5,10.700000,2.5,10.700000,3.5


In [182]:
# #unemployment_rate = [9.63, 8.95, 8.07, 7.37, 6.17, 5.28, 4.87, 4.36, 3.90, 3.67, 8.05, 5.35, 3.61]
# for state in unemployment_df['state'].unique():
#     temp_df = unemployment_df[unemployment_df['state'] == state].copy()
#     #temp_df['unemployment-rate'] = temp_df['below_poverty_line_percent']/temp_df['unemployment-rate']
#     temp_df = temp_df.rename(columns={'unemployment-rate': f'{state}-unemployment-rate', 'below_poverty_line_percent': f"{state}-poverty-percent"})
#     temp_df = temp_df.drop(columns=['state'])

#     print(temp_df.columns)
#     meat_df = pd.merge(meat_df, temp_df, on=['year-month'])

meat_df = meat_df[meat_df['year-month'].str.match(r'*-12')]
# #print(new_df['unemployment_rate'].expanding().corr(new_df['S&P_growth']))
for col in meat_df.columns[1:]:
    meat_df[col] = meat_df[col].pct_change()

meat_df = meat_df.dropna()

KeyError: 'year-month'

In [None]:
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry,...,virginia-poverty-rate,virginia-unemployment-rate,washington-poverty-rate,washington-unemployment-rate,west virginia-poverty-rate,west virginia-unemployment-rate,wisconsin-poverty-rate,wisconsin-unemployment-rate,wyoming-poverty-rate,wyoming-unemployment-rate
1,2010-02,-0.060407,-0.070796,-0.029369,-0.024590,-0.032389,-0.082011,-0.070348,-0.045915,-0.038162,...,0.003298,0.000000,0.002808,-0.009804,0.000488,0.061224,0.002597,0.029703,0.000322,-0.045977
2,2010-03,0.131538,0.133333,0.160516,0.428571,0.154225,0.250720,0.002829,0.146186,0.135193,...,0.002969,-0.024691,0.002529,-0.029703,0.000441,-0.076923,0.002397,-0.028846,0.000813,-0.024096
3,2010-04,-0.032054,-0.092437,-0.093427,-0.276471,-0.039121,-0.062212,0.151857,-0.062555,-0.017021,...,0.003278,-0.088608,0.002793,-0.091837,0.000488,-0.114583,0.002710,-0.128713,0.001426,-0.086420
4,2010-05,-0.023735,-0.092593,-0.123754,-0.040650,-0.006023,-0.012285,-0.071224,-0.070230,-0.015046,...,0.003162,0.013889,0.002695,0.000000,0.000472,-0.035294,0.002676,-0.056818,0.001852,-0.054054
5,2010-06,0.111661,0.061224,0.130540,0.127119,0.039636,0.087065,-0.035816,0.119805,0.030410,...,0.003257,0.000000,0.002777,-0.011236,0.000487,0.024390,0.002818,0.024096,0.002345,-0.057143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,2021-09,-0.027668,0.023810,0.023805,0.045455,0.006791,-0.025532,0.034017,-0.002512,0.009279,...,0.001066,-0.150000,-0.000796,-0.098039,-0.000470,-0.204082,0.000000,-0.216216,0.000000,-0.153846
141,2021-10,0.028853,0.023256,0.047213,0.097826,-0.025633,-0.032751,-0.007803,0.038128,-0.023806,...,0.001168,-0.088235,-0.000703,-0.108696,-0.000414,-0.076923,0.000000,-0.137931,0.000000,0.000000
142,2021-11,0.007699,0.022727,0.016059,0.079208,-0.042453,-0.083521,0.031881,0.012065,-0.034818,...,0.001348,-0.322581,-0.000650,-0.048780,-0.000383,0.000000,0.000000,-0.040000,0.000000,0.060606
143,2021-12,-0.006829,0.133333,-0.001710,-0.036697,0.009494,0.100985,-0.066323,-0.004184,0.001599,...,0.001440,0.285714,-0.000549,-0.025641,-0.000324,-0.027778,0.000000,0.041667,0.000000,0.000000


In [None]:
meat_df[meat_df.columns[1:]].corr().to_csv('../udataset/state_pearson_correlation.csv')
meat_df[meat_df.columns[1:]].corr(method = 'spearman').to_csv('../udataset/state_spearman_correlation.csv')
# meat_df['expanding_correlation'] = meat_df['delaware-poverty-percent'].expanding().corr(meat_df['Beef'])
# #meat_df['expanding_correlation'].to_csv('../udataset/state-correlation.csv')

# plt.figure(figsize=(10, 5))
# plt.plot(meat_df['year-month'], meat_df['expanding_correlation'], label='Cali')
# plt.xlabel('Year')
# plt.ylabel('Below Poverty Line (%)')
# plt.legend()
# plt.grid(True)
# plt.show()
#x = meat_df['expanding_correlation']
# plt.plot()

In [None]:
meat_df.to_csv('../udataset/meat_poverty_data.csv')

In [None]:
# Create an empty DataFrame to store the p-values
p_values = pd.DataFrame(index=meat_df.columns[1:], columns=meat_df.columns[1:])

In [None]:
for col1 in meat_df.columns[1:]:
    for col2 in meat_df.columns[1:]:
        if col1 == col2:
            p_values.at[col1, col2] = np.NaN  # No need to calculate p-value for itself
        else:
            _, p_value = pearsonr(meat_df[col1], meat_df[col2])
            p_values.at[col1, col2] = p_value

In [None]:
p_values.to_csv('../udataset/p_values_state_poverty.csv')