# Setup

In [288]:
import pandas as pd
import os

SAVE_DIR = '../udataset/meat/'
EXTRA_COLUMNS = ['Red Meat', 'Poultry']
os.makedirs(SAVE_DIR, exist_ok=True)

Define a function for fixing the datetime in the datasets

In [289]:
def fix_datetime(df: pd.DataFrame, year_col: str = 'Year', month_col: str = 'Month') -> pd.DataFrame:
    # Add date in correct format
    df[year_col] = df[year_col].astype(str)
    df[month_col] = df[month_col].astype(str)
    df['DateTime'] = df[year_col] + '-' + df[month_col].str.rjust(2, '0')

    # Reorder the columns
    cols = df.columns.tolist()
    cols = [cols[-1]] + cols[:-1]
    df = df[cols]

    # Drop old columns
    df = df.drop(columns=['Date', 'Year', 'Month'])
    return df

## Meat Stats Cold Storage

Load dataset

In [290]:
storage_df = pd.read_csv("../dataset/Meat_Stats_Cold_Storage.csv")

Clean the dataset

In [291]:
# Fix the dates in the dataset
storage_df = fix_datetime(storage_df)

# Convert unit
storage_df = storage_df.drop(columns=['Unit'])
storage_df['Weight'] = storage_df['Weight'] * 1e6

# Remove rows with no weight
storage_df = storage_df[~storage_df['Weight'].isnull()]
storage_df['Animal'] = storage_df['Animal'].replace('Frozen Eggs', 'Frozen Egg')

Get which animals are red meat or poultry

In [292]:
RED_MEAT = list(storage_df[storage_df['Type_Of_Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(storage_df[storage_df['Type_Of_Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [293]:
storage_df = storage_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Weight',
    aggfunc='sum'
)

Drop empty rows and add red meat and poultry summative columns

In [294]:
storage_df = storage_df.dropna()

storage_df['Red Meat'] = storage_df[RED_MEAT].sum(axis=1)
storage_df['Poultry'] = storage_df[POULTRY].sum(axis=1)

Reorder the dataset

In [295]:
storage_df = storage_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

Save the dataset

In [296]:
storage_df.reset_index(inplace=True)
storage_df.to_csv(SAVE_DIR + "cold_storage.csv", index=False)
storage_df

Animal,DateTime,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Frozen Egg,Red Meat,Poultry
0,2003-06,385120000.0,5100000.0,499720000.0,5840000.0,672030000.0,4980000.0,658840000.0,17700000.0,895780000.0,1.353550e+09
1,2003-07,371480000.0,4070000.0,460010000.0,5430000.0,639610000.0,3510000.0,718210000.0,17950000.0,840990000.0,1.379280e+09
2,2003-08,368250000.0,3800000.0,440650000.0,5930000.0,621270000.0,3740000.0,722540000.0,18570000.0,818630000.0,1.366120e+09
3,2003-09,370990000.0,4170000.0,430240000.0,5860000.0,601010000.0,5730000.0,706500000.0,18040000.0,811260000.0,1.331280e+09
4,2003-10,379830000.0,4600000.0,435150000.0,6210000.0,590660000.0,5200000.0,647540000.0,16640000.0,825790000.0,1.260040e+09
...,...,...,...,...,...,...,...,...,...,...,...
242,2023-08,410460000.0,1170000.0,470760000.0,27400000.0,810060000.0,8280000.0,439790000.0,29400000.0,909790000.0,1.287530e+09
243,2023-09,395400000.0,1150000.0,469240000.0,26170000.0,793450000.0,9250000.0,446880000.0,32210000.0,891960000.0,1.281790e+09
244,2023-10,420850000.0,1300000.0,461660000.0,26160000.0,795130000.0,8590000.0,421230000.0,30180000.0,909970000.0,1.255130e+09
245,2023-11,445670000.0,940000.0,437900000.0,26130000.0,822630000.0,7030000.0,319650000.0,31460000.0,910640000.0,1.180770e+09


# Meat Stats Meat Production

Load the dataset

In [297]:
production_df = pd.read_csv("../dataset/Meat_Stats_Meat_Production.csv")

Clean the dataset

In [298]:
# Fix the dates in the dataset
production_df = fix_datetime(production_df)

# Convert unit
production_df = production_df.drop(columns=['Unit'])
production_df['Production'] = production_df['Production'].str.replace(',', '').astype(float) * 1e6

# Remove rows with no production
production_df = production_df[~production_df['Production'].isnull()]
production_df['Animal'] = production_df['Animal'].replace('Broilers', 'Broiler')

Only consider federally inspected data

In [299]:
production_df = production_df[production_df['Commercial or Federally Inspected'] == 'Federally Inspected']

Get which animals are red meat or poultry

In [300]:
RED_MEAT = list(production_df[production_df['Type of Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(production_df[production_df['Type of Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [301]:
production_df = production_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Production',
    aggfunc='sum'
)

Drop empty rows and add red meat and poultry summative columns

In [302]:
production_df = production_df.dropna()
production_df['Red Meat'] = production_df[RED_MEAT].sum(axis=1)
production_df['Poultry'] = production_df[POULTRY].sum(axis=1)

Reorder the dataset

In [303]:
production_df = production_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

Save the dataset

In [304]:
production_df.reset_index(inplace=True)
production_df.to_csv(SAVE_DIR + "production.csv", index=False)
production_df

Animal,DateTime,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry
0,2001-01,2.172000e+09,17000000.0,1.672000e+09,18000000.0,2.622200e+09,42700000.0,403400000.0,3.879000e+09,3.068300e+09
1,2001-02,1.852000e+09,15000000.0,1.467000e+09,17000000.0,2.322200e+09,39700000.0,461200000.0,3.351000e+09,2.823100e+09
2,2001-03,2.065000e+09,16000000.0,1.606000e+09,23000000.0,2.588600e+09,44400000.0,409300000.0,3.710000e+09,3.042300e+09
3,2001-04,1.910000e+09,15000000.0,1.514000e+09,19000000.0,2.515700e+09,42200000.0,462000000.0,3.458000e+09,3.019900e+09
4,2001-05,2.265000e+09,15000000.0,1.535000e+09,16000000.0,2.835600e+09,45600000.0,428800000.0,3.831000e+09,3.310000e+09
...,...,...,...,...,...,...,...,...,...,...
271,2023-08,2.329500e+09,4100000.0,2.282400e+09,8900000.0,4.156700e+09,51100000.0,433500000.0,4.624900e+09,4.641300e+09
272,2023-09,2.114600e+09,3500000.0,2.175500e+09,8600000.0,3.805500e+09,48400000.0,489200000.0,4.302200e+09,4.343100e+09
273,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09
274,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09


# Meat Stats Slaughter Counts

In [305]:
slaughter_df = pd.read_csv("../dataset/Meat_Stats_Slaughter_Counts.csv")

In [306]:
# Fix the dates in the dataset
slaughter_df = fix_datetime(slaughter_df)

# Convert unit
slaughter_df = slaughter_df.drop(columns=['Unit'])
slaughter_df['Count'] = slaughter_df['Count'].str.replace(',', '').astype(float) * 1e3
slaughter_df['Animal'] = slaughter_df['Animal'].replace('Broilers', 'Broiler')
slaughter_df

Unnamed: 0,DateTime,Animal,Count,Commercial_Or_Federally_Inspected,Type_Of_Meat
0,2023-12,Cattle,2593400.0,Commercial,Red Meat
1,2023-11,Cattle,2729900.0,Commercial,Red Meat
2,2023-10,Cattle,2826000.0,Commercial,Red Meat
3,2023-09,Cattle,2610600.0,Commercial,Red Meat
4,2023-08,Cattle,2890000.0,Commercial,Red Meat
...,...,...,...,...,...
15278,1983-04,Turkeys,10563000.0,Federally Inspected,Poultry
15279,1983-03,Turkeys,11979000.0,Federally Inspected,Poultry
15280,1983-02,Turkeys,8477000.0,Federally Inspected,Poultry
15281,1983-01,Turkeys,8532000.0,Federally Inspected,Poultry


Only consider federally inspected data

In [307]:
slaughter_df = slaughter_df[slaughter_df['Commercial_Or_Federally_Inspected'] == 'Federally Inspected']

Get which animals are red meat or poultry

In [308]:
RED_MEAT = list(slaughter_df[slaughter_df['Type_Of_Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(slaughter_df[slaughter_df['Type_Of_Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [309]:
slaughter_df = slaughter_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Count',
    aggfunc='sum'
)

In [310]:
slaughter_df = slaughter_df.dropna()
slaughter_df['Red Meat'] = slaughter_df[RED_MEAT].sum(axis=1)
slaughter_df['Poultry'] = slaughter_df[POULTRY].sum(axis=1)

slaughter_df = slaughter_df[slaughter_df['Other chickens'] != 0]

In [311]:
slaughter_df

Animal,Barrows and Gilts,Beef Cows,Boars and Stags,Broiler,Bulls and Stags,Calves,Cattle,Dairy Cows,Heifers,Hogs,Lambs and Yearlings,Mature Sheep,Other chickens,Sheep and Lambs,Sows,Steers,Turkeys,Red Meat,Poultry
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1982-12,18872000.0,342000.0,211000.0,324325000.0,194000.0,725800.0,8762000.0,343000.0,2659000.0,20068200.0,1511000.0,123000.0,0.0,1634100.0,986000.0,3890000.0,11725000.0,60321100.0,336050000.0
1983-01,6050000.0,306000.0,71000.0,340252000.0,59000.0,220900.0,2893800.0,307000.0,871000.0,6421200.0,481000.0,28000.0,0.0,509400.0,300000.0,1351000.0,8532000.0,19869300.0,348784000.0
1983-02,5455000.0,240000.0,65000.0,313680000.0,54000.0,203500.0,2553900.0,240000.0,743000.0,5762500.0,430000.0,28000.0,0.0,457400.0,243000.0,1277000.0,8477000.0,17752300.0,322157000.0
1983-03,6994000.0,259000.0,76000.0,368250000.0,65000.0,246100.0,2827600.0,260000.0,831000.0,7350300.0,583000.0,33000.0,0.0,616100.0,280000.0,1412000.0,11979000.0,21833100.0,380229000.0
1983-04,6728000.0,252000.0,76000.0,345740000.0,61000.0,202000.0,2614800.0,253000.0,727000.0,7085900.0,471000.0,38000.0,0.0,509000.0,282000.0,1323000.0,10563000.0,20622700.0,356303000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08,10735900.0,281000.0,27700.0,851241000.0,51500.0,25900.0,2840800.0,275200.0,874000.0,11057300.0,141100.0,9800.0,10466000.0,150900.0,293800.0,1359200.0,19951000.0,28124100.0,881658000.0
2023-09,10145500.0,275800.0,26400.0,761055000.0,48400.0,21000.0,2559800.0,240500.0,788600.0,10430800.0,133200.0,10600.0,9680000.0,143800.0,259000.0,1206500.0,17209000.0,26289900.0,787944000.0
2023-10,11064600.0,330700.0,26300.0,824609000.0,50000.0,22300.0,2767700.0,242900.0,870000.0,11366600.0,152900.0,10400.0,10409000.0,163300.0,275800.0,1274100.0,20529000.0,28617600.0,855547000.0
2023-11,10799800.0,341000.0,24800.0,761801000.0,44400.0,23400.0,2681300.0,229700.0,856900.0,11082600.0,158000.0,10400.0,9260000.0,168400.0,257900.0,1209200.0,18530000.0,27887800.0,789591000.0


Reorder the dataset

In [312]:
slaughter_df = slaughter_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

In [313]:
slaughter_df.reset_index(inplace=True)
slaughter_df.to_csv(SAVE_DIR + "slaughter.csv", index=False)
slaughter_df

Animal,DateTime,Cattle,Steers,Heifers,Beef Cows,Dairy Cows,Bulls and Stags,Calves,Hogs,Barrows and Gilts,Sows,Boars and Stags,Sheep and Lambs,Lambs and Yearlings,Mature Sheep,Broiler,Other chickens,Turkeys,Red Meat,Poultry
0,1982-12,8762000.0,3890000.0,2659000.0,342000.0,343000.0,194000.0,725800.0,20068200.0,18872000.0,986000.0,211000.0,1634100.0,1511000.0,123000.0,324325000.0,0.0,11725000.0,60321100.0,336050000.0
1,1983-01,2893800.0,1351000.0,871000.0,306000.0,307000.0,59000.0,220900.0,6421200.0,6050000.0,300000.0,71000.0,509400.0,481000.0,28000.0,340252000.0,0.0,8532000.0,19869300.0,348784000.0
2,1983-02,2553900.0,1277000.0,743000.0,240000.0,240000.0,54000.0,203500.0,5762500.0,5455000.0,243000.0,65000.0,457400.0,430000.0,28000.0,313680000.0,0.0,8477000.0,17752300.0,322157000.0
3,1983-03,2827600.0,1412000.0,831000.0,259000.0,260000.0,65000.0,246100.0,7350300.0,6994000.0,280000.0,76000.0,616100.0,583000.0,33000.0,368250000.0,0.0,11979000.0,21833100.0,380229000.0
4,1983-04,2614800.0,1323000.0,727000.0,252000.0,253000.0,61000.0,202000.0,7085900.0,6728000.0,282000.0,76000.0,509000.0,471000.0,38000.0,345740000.0,0.0,10563000.0,20622700.0,356303000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
488,2023-08,2840800.0,1359200.0,874000.0,281000.0,275200.0,51500.0,25900.0,11057300.0,10735900.0,293800.0,27700.0,150900.0,141100.0,9800.0,851241000.0,10466000.0,19951000.0,28124100.0,881658000.0
489,2023-09,2559800.0,1206500.0,788600.0,275800.0,240500.0,48400.0,21000.0,10430800.0,10145500.0,259000.0,26400.0,143800.0,133200.0,10600.0,761055000.0,9680000.0,17209000.0,26289900.0,787944000.0
490,2023-10,2767700.0,1274100.0,870000.0,330700.0,242900.0,50000.0,22300.0,11366600.0,11064600.0,275800.0,26300.0,163300.0,152900.0,10400.0,824609000.0,10409000.0,20529000.0,28617600.0,855547000.0
491,2023-11,2681300.0,1209200.0,856900.0,341000.0,229700.0,44400.0,23400.0,11082600.0,10799800.0,257900.0,24800.0,168400.0,158000.0,10400.0,761801000.0,9260000.0,18530000.0,27887800.0,789591000.0
