## Setup

Import modules

In [296]:
import pandas as pd
import numpy as np

from scipy.stats import pearsonr

Define date constants

In [297]:
START_DATE = '2010-01'
END_DATE = '2022-12'

Import dataset

In [298]:
MEAT_PRODUCTION_DATASET = '../udataset/meat/production.csv'
meat_df = pd.read_csv(MEAT_PRODUCTION_DATASET)#
meat_df = meat_df.rename(columns={'DateTime': 'YearMonth'})
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry
0,2001-01,2.172000e+09,17000000.0,1.672000e+09,18000000.0,2.622200e+09,42700000.0,403400000.0,3.879000e+09,3.068300e+09
1,2001-02,1.852000e+09,15000000.0,1.467000e+09,17000000.0,2.322200e+09,39700000.0,461200000.0,3.351000e+09,2.823100e+09
2,2001-03,2.065000e+09,16000000.0,1.606000e+09,23000000.0,2.588600e+09,44400000.0,409300000.0,3.710000e+09,3.042300e+09
3,2001-04,1.910000e+09,15000000.0,1.514000e+09,19000000.0,2.515700e+09,42200000.0,462000000.0,3.458000e+09,3.019900e+09
4,2001-05,2.265000e+09,15000000.0,1.535000e+09,16000000.0,2.835600e+09,45600000.0,428800000.0,3.831000e+09,3.310000e+09
...,...,...,...,...,...,...,...,...,...,...
271,2023-08,2.329500e+09,4100000.0,2.282400e+09,8900000.0,4.156700e+09,51100000.0,433500000.0,4.624900e+09,4.641300e+09
272,2023-09,2.114600e+09,3500000.0,2.175500e+09,8600000.0,3.805500e+09,48400000.0,489200000.0,4.302200e+09,4.343100e+09
273,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09
274,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09


In [299]:
UNEMPLOYMENT_DATASET = '../udataset/unemployment_by_state.csv'
unemployment_df = pd.read_csv(UNEMPLOYMENT_DATASET)
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate
0,alabama,2010-01,11.7
1,alabama,2010-02,11.4
2,alabama,2010-03,11.0
3,alabama,2010-04,10.1
4,alabama,2010-05,9.9
...,...,...,...
8835,puerto rico,2023-10,5.5
8836,puerto rico,2023-11,5.7
8837,puerto rico,2023-12,5.8
8838,puerto rico,2024-01,5.8


In [300]:
POVERTYDATA ='../udataset/wealth_data_interpolated.csv'
poverty_df = pd.read_csv(POVERTYDATA)

poverty_df['date'] = poverty_df['date'].str.slice(0, 7)
poverty_df

Unnamed: 0,date,below_poverty_line_percent,state
0,2010-01,17.141096,alabama
1,2010-02,17.179452,alabama
2,2010-03,17.221918,alabama
3,2010-04,17.263014,alabama
4,2010-05,17.305479,alabama
...,...,...,...
7483,2021-08,10.700000,wyoming
7484,2021-09,10.700000,wyoming
7485,2021-10,10.700000,wyoming
7486,2021-11,10.700000,wyoming


In [301]:
unemployment_df = pd.merge(unemployment_df, poverty_df,
                           left_on=['YearMonth', 'State'],
                           right_on=['date', 'state'])

unemployment_df = unemployment_df[['State', 'YearMonth', 'UnemploymentRate', 'below_poverty_line_percent']]
unemployment_df

Unnamed: 0,State,YearMonth,UnemploymentRate,below_poverty_line_percent
0,alabama,2010-01,11.7,17.141096
1,alabama,2010-02,11.4,17.179452
2,alabama,2010-03,11.0,17.221918
3,alabama,2010-04,10.1,17.263014
4,alabama,2010-05,9.9,17.305479
...,...,...,...,...
7483,puerto rico,2021-08,9.2,42.347620
7484,puerto rico,2021-09,7.9,42.309405
7485,puerto rico,2021-10,7.1,42.271375
7486,puerto rico,2021-11,7.3,42.236039


In [302]:
# Pivot the table for each metric and merge them into a single dataframe with YearMonth as the index
unemployment_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='UnemploymentRate'
)
poverty_pivot = unemployment_df.pivot(
    index='YearMonth',
    columns='State',
    values='below_poverty_line_percent'
)

In [303]:
# Rename the columns
unemployment_pivot.columns = [f"{col.lower()}-unemployment-rate" for col in unemployment_pivot.columns]
poverty_pivot.columns = [f"{col.lower()}-poverty-rate" for col in poverty_pivot.columns]

In [304]:
merged_df = pd.merge(unemployment_pivot, poverty_pivot, left_index=True, right_index=True, how='outer')
merged_df = merged_df.sort_index(axis=1)

In [305]:
meat_df = pd.merge(meat_df, merged_df, on='YearMonth', how='right')
meat_df

Unnamed: 0,YearMonth,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry,...,virginia-poverty-rate,virginia-unemployment-rate,washington-poverty-rate,washington-unemployment-rate,west virginia-poverty-rate,west virginia-unemployment-rate,wisconsin-poverty-rate,wisconsin-unemployment-rate,wyoming-poverty-rate,wyoming-unemployment-rate
0,2010-01,2.051100e+09,11300000.0,1.797800e+09,12200000.0,2.831200e+09,37800000.0,456300000.0,3.872400e+09,3.325300e+09,...,10.332877,8.1,12.132877,10.2,17.408219,9.8,11.629139,10.1,9.802957,8.7
1,2010-02,1.927200e+09,10500000.0,1.745000e+09,11900000.0,2.739500e+09,34700000.0,424200000.0,3.694600e+09,3.198400e+09,...,10.363562,8.1,12.163562,10.1,17.415890,10.4,11.656997,10.4,9.810761,8.3
2,2010-03,2.180700e+09,11900000.0,2.025100e+09,17000000.0,3.162000e+09,43400000.0,425400000.0,4.234700e+09,3.630800e+09,...,10.397534,7.9,12.197534,9.8,17.424384,9.6,11.688565,10.1,9.824580,8.1
3,2010-04,2.110800e+09,10800000.0,1.835900e+09,12300000.0,3.038300e+09,40700000.0,490000000.0,3.969800e+09,3.569000e+09,...,10.430411,7.2,12.230411,8.9,17.432603,8.5,11.719822,8.8,9.842634,7.4
4,2010-05,2.060700e+09,9800000.0,1.608700e+09,11800000.0,3.020000e+09,40200000.0,455100000.0,3.691000e+09,3.515300e+09,...,10.464384,7.3,12.264384,8.9,17.441096,8.2,11.752833,8.3,9.865588,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2021-08,2.324000e+09,4200000.0,2.201200e+09,8800000.0,3.902000e+09,47000000.0,458600000.0,4.538200e+09,4.407600e+09,...,9.943999,4.0,9.923843,5.1,16.823843,4.9,10.700000,3.7,10.700000,3.9
140,2021-09,2.259700e+09,4300000.0,2.253600e+09,9200000.0,3.928500e+09,45800000.0,474200000.0,4.526800e+09,4.448500e+09,...,9.955572,3.4,9.916847,4.6,16.816847,3.9,10.700000,2.9,10.700000,3.3
141,2021-10,2.324900e+09,4400000.0,2.360000e+09,10100000.0,3.827800e+09,44300000.0,470500000.0,4.699400e+09,4.342600e+09,...,9.968945,3.1,9.910378,4.1,16.810378,3.6,10.700000,2.5,10.700000,3.3
142,2021-11,2.342800e+09,4500000.0,2.397900e+09,10900000.0,3.665300e+09,40600000.0,485500000.0,4.756100e+09,4.191400e+09,...,9.983254,2.1,9.904911,3.9,16.804911,3.6,10.700000,2.4,10.700000,3.5


In [306]:
# #unemployment_rate = [9.63, 8.95, 8.07, 7.37, 6.17, 5.28, 4.87, 4.36, 3.90, 3.67, 8.05, 5.35, 3.61]
# for state in unemployment_df['state'].unique():
#     temp_df = unemployment_df[unemployment_df['state'] == state].copy()
#     #temp_df['unemployment-rate'] = temp_df['below_poverty_line_percent']/temp_df['unemployment-rate']
#     temp_df = temp_df.rename(columns={'unemployment-rate': f'{state}-unemployment-rate', 'below_poverty_line_percent': f"{state}-poverty-percent"})
#     temp_df = temp_df.drop(columns=['state'])

#     print(temp_df.columns)
#     meat_df = pd.merge(meat_df, temp_df, on=['year-month'])

# #meat_df = meat_df[meat_df['year-month'].str.match(r'*-12')]
# #print(new_df['unemployment_rate'].expanding().corr(new_df['S&P_growth']))

In [307]:
meat_df[meat_df.columns[1:]].corr().to_csv('../udataset/state_correlation.csv')
# meat_df['expanding_correlation'] = meat_df['delaware-poverty-percent'].expanding().corr(meat_df['Beef'])
# #meat_df['expanding_correlation'].to_csv('../udataset/state-correlation.csv')

# plt.figure(figsize=(10, 5))
# plt.plot(meat_df['year-month'], meat_df['expanding_correlation'], label='Cali')
# plt.xlabel('Year')
# plt.ylabel('Below Poverty Line (%)')
# plt.legend()
# plt.grid(True)
# plt.show()
#x = meat_df['expanding_correlation']
# plt.plot()

In [308]:
meat_df.to_csv('../udataset/meat_poverty_data.csv')

In [309]:
# Create an empty DataFrame to store the p-values
p_values = pd.DataFrame(index=meat_df.columns[1:], columns=meat_df.columns[1:])

In [310]:
for col1 in meat_df.columns[1:]:
    for col2 in meat_df.columns[1:]:
        if col1 == col2:
            p_values.at[col1, col2] = np.NaN  # No need to calculate p-value for itself
        else:
            _, p_value = pearsonr(meat_df[col1], meat_df[col2])
            p_values.at[col1, col2] = p_value

In [311]:
p_values.to_csv('../udataset/p_values_state_poverty.csv')