# Setup

In [373]:
import pandas as pd
import os

SAVE_DIR = '../udataset/meat/'
EXTRA_COLUMNS = ['Red Meat', 'Poultry']
os.makedirs(SAVE_DIR, exist_ok=True)

Define a function for fixing the datetime in the datasets

In [374]:
def fix_datetime(df: pd.DataFrame, year_col: str = 'Year', month_col: str = 'Month') -> pd.DataFrame:
    # Add date in correct format
    df[year_col] = df[year_col].astype(str)
    df[month_col] = df[month_col].astype(str)
    df['DateTime'] = df[year_col] + '-' + df[month_col].str.rjust(2, '0')

    # Reorder the columns
    cols = df.columns.tolist()
    cols = [cols[-1]] + cols[:-1]
    df = df[cols]

    # Drop old columns
    df = df.drop(columns=['Date', 'Year', 'Month'])
    return df

## Meat Stats Cold Storage

Load dataset

In [375]:
storage_df = pd.read_csv("../dataset/Meat_Stats_Cold_Storage.csv")

Clean the dataset

In [376]:
# Fix the dates in the dataset
storage_df = fix_datetime(storage_df)

# Convert unit
storage_df = storage_df.drop(columns=['Unit'])
storage_df['Weight'] = storage_df['Weight'] * 1e6

# Remove rows with no weight
storage_df = storage_df[~storage_df['Weight'].isnull()]
storage_df['Animal'] = storage_df['Animal'].replace('Frozen Eggs', 'Frozen Egg')

Get which animals are red meat or poultry

In [377]:
RED_MEAT = list(storage_df[storage_df['Type_Of_Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(storage_df[storage_df['Type_Of_Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [378]:
storage_df = storage_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Weight',
    aggfunc='sum'
)

Add red meat and poultry summative columns

In [379]:
storage_df['Red Meat'] = storage_df[RED_MEAT].sum(axis=1)
storage_df['Poultry'] = storage_df[POULTRY].sum(axis=1)
storage_df['Total'] = storage_df[RED_MEAT + POULTRY].sum(axis=1)

Reorder the dataset

In [380]:
storage_df = storage_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

Save the dataset

In [381]:
storage_df.reset_index(inplace=True)
storage_df.to_csv(SAVE_DIR + "cold_storage.csv", index=False)
storage_df

Animal,DateTime,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Frozen Egg,Red Meat,Poultry
0,1983-01,294380000.0,7360000.0,219020000.0,8650000.0,,18000000.0,203910000.0,,529410000.0,2.219100e+08
1,1983-02,303210000.0,7260000.0,224150000.0,7680000.0,,19870000.0,193830000.0,,542300000.0,2.137000e+08
2,1983-03,307050000.0,7570000.0,215820000.0,7670000.0,,17280000.0,187690000.0,,538110000.0,2.049700e+08
3,1983-04,299080000.0,6770000.0,234740000.0,8220000.0,,18790000.0,185330000.0,,548810000.0,2.041200e+08
4,1983-05,277330000.0,7250000.0,272720000.0,8330000.0,,23180000.0,192270000.0,,565630000.0,2.154500e+08
...,...,...,...,...,...,...,...,...,...,...,...
487,2023-08,410460000.0,1170000.0,470760000.0,27400000.0,810060000.0,8280000.0,439790000.0,29400000.0,909790000.0,1.287530e+09
488,2023-09,395400000.0,1150000.0,469240000.0,26170000.0,793450000.0,9250000.0,446880000.0,32210000.0,891960000.0,1.281790e+09
489,2023-10,420850000.0,1300000.0,461660000.0,26160000.0,795130000.0,8590000.0,421230000.0,30180000.0,909970000.0,1.255130e+09
490,2023-11,445670000.0,940000.0,437900000.0,26130000.0,822630000.0,7030000.0,319650000.0,31460000.0,910640000.0,1.180770e+09


# Meat Stats Meat Production

Load the dataset

In [382]:
production_df = pd.read_csv("../dataset/Meat_Stats_Meat_Production.csv")

Clean the dataset

In [383]:
# Fix the dates in the dataset
production_df = fix_datetime(production_df)

# Convert unit
production_df = production_df.drop(columns=['Unit'])
production_df['Production'] = production_df['Production'].str.replace(',', '').astype(float) * 1e6

# Remove rows with no production
production_df = production_df[~production_df['Production'].isnull()]
production_df['Animal'] = production_df['Animal'].replace('Broilers', 'Broiler')

Only consider federally inspected data

In [384]:
production_df = production_df[production_df['Commercial or Federally Inspected'] == 'Federally Inspected']

Get which animals are red meat or poultry

In [385]:
RED_MEAT = list(production_df[production_df['Type of Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(production_df[production_df['Type of Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [386]:
production_df = production_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Production',
    aggfunc='sum'
)

Add red meat and poultry summative columns

In [387]:
production_df['Red Meat'] = production_df[RED_MEAT].sum(axis=1)
production_df['Poultry'] = production_df[POULTRY].sum(axis=1)
production_df['Total'] = production_df[RED_MEAT + POULTRY].sum(axis=1)

Reorder the dataset

In [388]:
production_df = production_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

Save the dataset

In [389]:
production_df.reset_index(inplace=True)
production_df.to_csv(SAVE_DIR + "production.csv", index=False)
production_df

Animal,DateTime,Beef,Veal,Pork,Lamb and Mutton,Broiler,Other Chicken,Turkey,Red Meat,Poultry
0,1921-01,3.580000e+08,27000000.0,5.650000e+08,44000000.0,,,393000000.0,9.940000e+08,3.930000e+08
1,1921-02,2.790000e+08,24000000.0,4.840000e+08,39000000.0,,,,8.260000e+08,0.000000e+00
2,1921-03,3.400000e+08,31000000.0,3.720000e+08,45000000.0,,,,7.880000e+08,0.000000e+00
3,1921-04,3.160000e+08,29000000.0,3.740000e+08,41000000.0,,,,7.600000e+08,0.000000e+00
4,1921-05,3.190000e+08,31000000.0,3.960000e+08,37000000.0,,,,7.830000e+08,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...
1207,2023-08,2.329500e+09,4100000.0,2.282400e+09,8900000.0,4.156700e+09,51100000.0,433500000.0,4.624900e+09,4.641300e+09
1208,2023-09,2.114600e+09,3500000.0,2.175500e+09,8600000.0,3.805500e+09,48400000.0,489200000.0,4.302200e+09,4.343100e+09
1209,2023-10,2.300800e+09,4000000.0,2.406200e+09,9800000.0,4.177300e+09,50600000.0,431200000.0,4.720800e+09,4.659100e+09
1210,2023-11,2.247300e+09,4100000.0,2.377400e+09,10100000.0,3.817300e+09,44500000.0,500300000.0,4.638900e+09,4.362100e+09


# Meat Stats Slaughter Counts

In [390]:
slaughter_df = pd.read_csv("../dataset/Meat_Stats_Slaughter_Counts.csv")

In [391]:
# Fix the dates in the dataset
slaughter_df = fix_datetime(slaughter_df)

# Convert unit
slaughter_df = slaughter_df.drop(columns=['Unit'])
slaughter_df['Count'] = slaughter_df['Count'].str.replace(',', '').astype(float) * 1e3
slaughter_df['Animal'] = slaughter_df['Animal'].replace('Broilers', 'Broiler')
slaughter_df

Unnamed: 0,DateTime,Animal,Count,Commercial_Or_Federally_Inspected,Type_Of_Meat
0,2023-12,Cattle,2593400.0,Commercial,Red Meat
1,2023-11,Cattle,2729900.0,Commercial,Red Meat
2,2023-10,Cattle,2826000.0,Commercial,Red Meat
3,2023-09,Cattle,2610600.0,Commercial,Red Meat
4,2023-08,Cattle,2890000.0,Commercial,Red Meat
...,...,...,...,...,...
15278,1983-04,Turkeys,10563000.0,Federally Inspected,Poultry
15279,1983-03,Turkeys,11979000.0,Federally Inspected,Poultry
15280,1983-02,Turkeys,8477000.0,Federally Inspected,Poultry
15281,1983-01,Turkeys,8532000.0,Federally Inspected,Poultry


Only consider federally inspected data

In [392]:
slaughter_df = slaughter_df[slaughter_df['Commercial_Or_Federally_Inspected'] == 'Federally Inspected']

Get which animals are red meat or poultry

In [393]:
RED_MEAT = list(slaughter_df[slaughter_df['Type_Of_Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(slaughter_df[slaughter_df['Type_Of_Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [394]:
slaughter_df = slaughter_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Count',
    aggfunc='sum'
)

In [395]:
slaughter_df['Red Meat'] = slaughter_df[RED_MEAT].sum(axis=1)
slaughter_df['Poultry'] = slaughter_df[POULTRY].sum(axis=1)
slaughter_df['Total'] = slaughter_df[RED_MEAT + POULTRY].sum(axis=1)

slaughter_df = slaughter_df[slaughter_df['Other chickens'] != 0]

In [396]:
slaughter_df

Animal,Barrows and Gilts,Beef Cows,Boars and Stags,Broiler,Bulls and Stags,Calves,Cattle,Dairy Cows,Heifers,Hogs,Lambs and Yearlings,Mature Sheep,Other chickens,Sheep and Lambs,Sows,Steers,Turkeys,Red Meat,Poultry,Total
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2001-01,8220600.0,270300.0,28200.0,710683000.0,50200.0,89300.0,2947100.0,269300.0,961200.0,8520600.0,246800.0,11600.0,13999000.0,258400.0,271800.0,1396100.0,21303000.0,23541500.0,745985000.0,769526500.0
2001-02,7242500.0,226000.0,25600.0,627380000.0,44700.0,77200.0,2532500.0,220200.0,853500.0,7491100.0,226200.0,9800.0,12734000.0,236000.0,223000.0,1188000.0,19278000.0,20596300.0,659392000.0,679988300.0
2001-03,7910800.0,243800.0,30500.0,700816000.0,53100.0,82400.0,2866900.0,241900.0,936400.0,8207200.0,303500.0,12600.0,14302000.0,316100.0,265900.0,1391800.0,21790000.0,22862900.0,736908000.0,759770900.0
2001-04,7448400.0,228600.0,28600.0,675819000.0,48400.0,72400.0,2666700.0,202500.0,868800.0,7721900.0,263100.0,12200.0,13041000.0,275400.0,244900.0,1318300.0,20463000.0,21400200.0,709323000.0,730723200.0
2001-05,7549900.0,270700.0,28300.0,755640000.0,57100.0,76600.0,3152000.0,207800.0,1009900.0,7836300.0,214300.0,13000.0,13722000.0,227300.0,258100.0,1606500.0,23412000.0,22507800.0,792774000.0,815281800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08,10735900.0,281000.0,27700.0,851241000.0,51500.0,25900.0,2840800.0,275200.0,874000.0,11057300.0,141100.0,9800.0,10466000.0,150900.0,293800.0,1359200.0,19951000.0,28124100.0,881658000.0,909782100.0
2023-09,10145500.0,275800.0,26400.0,761055000.0,48400.0,21000.0,2559800.0,240500.0,788600.0,10430800.0,133200.0,10600.0,9680000.0,143800.0,259000.0,1206500.0,17209000.0,26289900.0,787944000.0,814233900.0
2023-10,11064600.0,330700.0,26300.0,824609000.0,50000.0,22300.0,2767700.0,242900.0,870000.0,11366600.0,152900.0,10400.0,10409000.0,163300.0,275800.0,1274100.0,20529000.0,28617600.0,855547000.0,884164600.0
2023-11,10799800.0,341000.0,24800.0,761801000.0,44400.0,23400.0,2681300.0,229700.0,856900.0,11082600.0,158000.0,10400.0,9260000.0,168400.0,257900.0,1209200.0,18530000.0,27887800.0,789591000.0,817478800.0


Reorder the dataset

In [397]:
slaughter_df = slaughter_df[RED_MEAT + POULTRY + EXTRA_COLUMNS]

In [398]:
slaughter_df.reset_index(inplace=True)
slaughter_df.to_csv(SAVE_DIR + "slaughter.csv", index=False)
slaughter_df

Animal,DateTime,Cattle,Steers,Heifers,Beef Cows,Dairy Cows,Bulls and Stags,Calves,Hogs,Barrows and Gilts,Sows,Boars and Stags,Sheep and Lambs,Lambs and Yearlings,Mature Sheep,Broiler,Other chickens,Turkeys,Red Meat,Poultry
0,2001-01,2947100.0,1396100.0,961200.0,270300.0,269300.0,50200.0,89300.0,8520600.0,8220600.0,271800.0,28200.0,258400.0,246800.0,11600.0,710683000.0,13999000.0,21303000.0,23541500.0,745985000.0
1,2001-02,2532500.0,1188000.0,853500.0,226000.0,220200.0,44700.0,77200.0,7491100.0,7242500.0,223000.0,25600.0,236000.0,226200.0,9800.0,627380000.0,12734000.0,19278000.0,20596300.0,659392000.0
2,2001-03,2866900.0,1391800.0,936400.0,243800.0,241900.0,53100.0,82400.0,8207200.0,7910800.0,265900.0,30500.0,316100.0,303500.0,12600.0,700816000.0,14302000.0,21790000.0,22862900.0,736908000.0
3,2001-04,2666700.0,1318300.0,868800.0,228600.0,202500.0,48400.0,72400.0,7721900.0,7448400.0,244900.0,28600.0,275400.0,263100.0,12200.0,675819000.0,13041000.0,20463000.0,21400200.0,709323000.0
4,2001-05,3152000.0,1606500.0,1009900.0,270700.0,207800.0,57100.0,76600.0,7836300.0,7549900.0,258100.0,28300.0,227300.0,214300.0,13000.0,755640000.0,13722000.0,23412000.0,22507800.0,792774000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,2023-08,2840800.0,1359200.0,874000.0,281000.0,275200.0,51500.0,25900.0,11057300.0,10735900.0,293800.0,27700.0,150900.0,141100.0,9800.0,851241000.0,10466000.0,19951000.0,28124100.0,881658000.0
272,2023-09,2559800.0,1206500.0,788600.0,275800.0,240500.0,48400.0,21000.0,10430800.0,10145500.0,259000.0,26400.0,143800.0,133200.0,10600.0,761055000.0,9680000.0,17209000.0,26289900.0,787944000.0
273,2023-10,2767700.0,1274100.0,870000.0,330700.0,242900.0,50000.0,22300.0,11366600.0,11064600.0,275800.0,26300.0,163300.0,152900.0,10400.0,824609000.0,10409000.0,20529000.0,28617600.0,855547000.0
274,2023-11,2681300.0,1209200.0,856900.0,341000.0,229700.0,44400.0,23400.0,11082600.0,10799800.0,257900.0,24800.0,168400.0,158000.0,10400.0,761801000.0,9260000.0,18530000.0,27887800.0,789591000.0


# Meat Stats Slaughter Weights

Load the dataset

In [399]:
weights_df = pd.read_csv("../dataset/Meat_Stats_Slaughter_Weights.csv")

Clean the dataset

In [400]:
# Fix the dates in the dataset
weights_df = fix_datetime(weights_df)

# Convert unit
weights_df = weights_df.drop(columns=['Unit'])
weights_df

# Make the animals non-plural
weights_df['Animal'] = weights_df['Animal'].apply(lambda x: x[:-1] if x.endswith('s') else x)

weights_df['Weight'] = weights_df['Weight'].str.replace(',', '').astype(float)

Only consider federally inspected data

In [401]:
weights_df = weights_df[weights_df['Commercial_Or_Federally_Inspected'] == 'Federally Inspected']
weights_df = weights_df.drop(columns=['Commercial_Or_Federally_Inspected'])

Get which animals are red meat or poultry

In [402]:
filtered_df = weights_df[weights_df['Live_Or_Dressed'] == 'Dressed']
filtered_df = filtered_df.drop(columns=['Live_Or_Dressed'])

In [403]:
RED_MEAT = list(filtered_df[filtered_df['Type_Of_Meat'] == 'Red Meat']['Animal'].unique())
POULTRY = list(filtered_df[filtered_df['Type_Of_Meat'] == 'Poultry']['Animal'].unique())

Pivot the dataset into columns

In [404]:
filtered_df = filtered_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Weight',
    aggfunc='sum'
)

Reorder the dataset

In [405]:
filtered_df = filtered_df[RED_MEAT + POULTRY]

In [406]:
filtered_df.reset_index(inplace=True)
filtered_df.to_csv(SAVE_DIR + "weights.csv", index=False)
filtered_df

Animal,DateTime,Cattle,Steer,Heifer,Cow,Bulls and Stag,Calve,Hog,Sheep and Lamb
0,1983-01,637.0,706.0,611.0,514.0,754.0,127.0,175.0,57.0
1,1983-02,641.0,701.0,607.0,525.0,758.0,132.0,172.0,58.0
2,1983-03,642.0,699.0,607.0,528.0,764.0,132.0,173.0,58.0
3,1983-04,633.0,692.0,597.0,520.0,759.0,131.0,174.0,58.0
4,1983-05,635.0,694.0,603.0,511.0,763.0,140.0,176.0,57.0
...,...,...,...,...,...,...,...,...,...
487,2023-08,822.0,903.0,818.0,621.0,876.0,159.0,206.0,59.0
488,2023-09,828.0,918.0,827.0,613.0,874.0,168.0,209.0,60.0
489,2023-10,833.0,926.0,840.0,613.0,858.0,182.0,212.0,60.0
490,2023-11,840.0,935.0,851.0,619.0,851.0,178.0,215.0,60.0


In [407]:
weights_df

Unnamed: 0,DateTime,Animal,Weight,Live_Or_Dressed,Type_Of_Meat
1968,2023-12,Cattle,1404.0,Live,Red Meat
1969,2023-11,Cattle,1390.0,Live,Red Meat
1970,2023-10,Cattle,1378.0,Live,Red Meat
1971,2023-09,Cattle,1365.0,Live,Red Meat
1972,2023-08,Cattle,1353.0,Live,Red Meat
...,...,...,...,...,...
9343,1983-05,Sheep and Lamb,57.0,Dressed,Red Meat
9344,1983-04,Sheep and Lamb,58.0,Dressed,Red Meat
9345,1983-03,Sheep and Lamb,58.0,Dressed,Red Meat
9346,1983-02,Sheep and Lamb,58.0,Dressed,Red Meat


In [409]:
live_df = weights_df[weights_df.Live_Or_Dressed == 'Live']
live_animals = set(live_df['Animal'].unique())

In [410]:
dressed_df = weights_df[weights_df.Live_Or_Dressed == 'Dressed']
dressed_animals = set(dressed_df['Animal'].unique())

In [411]:
both = live_animals & dressed_animals
both

{'Calve', 'Cattle', 'Hog', 'Sheep and Lamb'}