# Setup

In [1]:
import pandas as pd
import functools as ft

TYPE_OF_MEAT = 'Type of Meat'

Define a function for fixing the datetime in the datasets

In [2]:
def fix_datetime(df: pd.DataFrame, year_col: str = 'Year', month_col: str = 'Month') -> pd.DataFrame:
    # Add date in correct format
    df[year_col] = df[year_col].astype(str)
    df[month_col] = df[month_col].astype(str)
    df['DateTime'] = df[year_col] + '-' + df[month_col].str.rjust(2, '0')

    # Reorder the columns
    cols = df.columns.tolist()
    cols = [cols[-1]] + cols[:-1]
    df = df[cols]

    # Drop old columns
    df = df.drop(columns=['Date', 'Year', 'Month'])
    return df

## Meat Stats Cold Storage

Load dataset

In [3]:
storage_df = pd.read_csv("../dataset/Meat_Stats_Cold_Storage.csv")

Clean the dataset

In [4]:
# Fix the dates in the dataset
storage_df = fix_datetime(storage_df)

# Convert unit
storage_df = storage_df.drop(columns=['Unit'])
storage_df['Weight'] = storage_df['Weight'] * 1e6

# Remove rows with no weight
storage_df = storage_df[~storage_df['Weight'].isnull()]

# Standardise type of meat column
storage_df = storage_df.rename(columns={'Type_Of_Meat': TYPE_OF_MEAT})

In [5]:
storage_df

Unnamed: 0,DateTime,Animal,Weight,Type of Meat
0,2023-12,Beef,457410000.0,Red Meat
1,2023-11,Beef,445670000.0,Red Meat
2,2023-10,Beef,420850000.0,Red Meat
3,2023-09,Beef,395400000.0,Red Meat
4,2023-08,Beef,410460000.0,Red Meat
...,...,...,...,...
3846,1990-06,Frozen Eggs,17390000.0,Poultry
3847,1990-05,Frozen Eggs,17290000.0,Poultry
3848,1990-04,Frozen Eggs,16780000.0,Poultry
3849,1990-03,Frozen Eggs,15180000.0,Poultry


# Meat Stats Meat Production

Load the dataset

In [6]:
production_df = pd.read_csv("../dataset/Meat_Stats_Meat_Production.csv")

Clean the dataset

In [7]:
# Fix the dates in the dataset
production_df = fix_datetime(production_df)

# Convert unit
production_df = production_df.drop(columns=['Unit'])
production_df['Production'] = production_df['Production'].str.replace(',', '').astype(float) * 1e6

# Remove rows with no production
production_df = production_df[~production_df['Production'].isnull()]

# Standardise type of meat column
production_df = production_df.rename(columns={'Type of Meat': TYPE_OF_MEAT})

Only consider federally inspected data

In [8]:
# Only consider Federally Inspected
production_df = production_df[production_df['Commercial or Federally Inspected'] == 'Federally Inspected']

Pivot the dataset into columns

In [9]:
production_df = production_df.pivot_table(
    index='DateTime',
    columns='Animal',
    values='Production',
    aggfunc='sum'
)

Drop empty rows and add red meat and poultry summative columns

In [None]:
production_df = production_df.dropna()
production_df['Red Meat'] = production_df[['Beef', 'Veal', 'Pork', 'Lamb and Mutton']].sum(axis=1)
production_df['Poultry'] = production_df[['Broilers', 'Other Chicken', 'Turkey']].sum(axis=1)

Save the dataset

In [10]:
production_df.to_csv("../udataset/meat_production.csv", index=False)
production_df

Animal,Beef,Broilers,Lamb and Mutton,Other Chicken,Pork,Turkey,Veal,Red Meat,Poultry
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-01,2.172000e+09,2.622200e+09,18000000.0,42700000.0,1.672000e+09,403400000.0,17000000.0,3.879000e+09,3.068300e+09
2001-02,1.852000e+09,2.322200e+09,17000000.0,39700000.0,1.467000e+09,461200000.0,15000000.0,3.351000e+09,2.823100e+09
2001-03,2.065000e+09,2.588600e+09,23000000.0,44400000.0,1.606000e+09,409300000.0,16000000.0,3.710000e+09,3.042300e+09
2001-04,1.910000e+09,2.515700e+09,19000000.0,42200000.0,1.514000e+09,462000000.0,15000000.0,3.458000e+09,3.019900e+09
2001-05,2.265000e+09,2.835600e+09,16000000.0,45600000.0,1.535000e+09,428800000.0,15000000.0,3.831000e+09,3.310000e+09
...,...,...,...,...,...,...,...,...,...
2023-08,2.329500e+09,4.156700e+09,8900000.0,51100000.0,2.282400e+09,433500000.0,4100000.0,4.624900e+09,4.641300e+09
2023-09,2.114600e+09,3.805500e+09,8600000.0,48400000.0,2.175500e+09,489200000.0,3500000.0,4.302200e+09,4.343100e+09
2023-10,2.300800e+09,4.177300e+09,9800000.0,50600000.0,2.406200e+09,431200000.0,4000000.0,4.720800e+09,4.659100e+09
2023-11,2.247300e+09,3.817300e+09,10100000.0,44500000.0,2.377400e+09,500300000.0,4100000.0,4.638900e+09,4.362100e+09


# Meat Stats Slaughter Counts

In [494]:
slaughter_df = pd.read_csv("../dataset/Meat_Stats_Slaughter_Counts.csv")
slaughter_df

Unnamed: 0,Date,Year,Month,Unit,Animal,Count,Commercial_Or_Federally_Inspected,Type_Of_Meat
0,Dec-2023,2023,12,"1,000 Head",Cattle,2593.40,Commercial,Red Meat
1,Nov-2023,2023,11,"1,000 Head",Cattle,2729.90,Commercial,Red Meat
2,Oct-2023,2023,10,"1,000 Head",Cattle,2826.00,Commercial,Red Meat
3,Sep-2023,2023,9,"1,000 Head",Cattle,2610.60,Commercial,Red Meat
4,Aug-2023,2023,8,"1,000 Head",Cattle,2890.00,Commercial,Red Meat
...,...,...,...,...,...,...,...,...
15278,Apr-1983,1983,4,"1,000 Head",Turkeys,10563.00,Federally Inspected,Poultry
15279,Mar-1983,1983,3,"1,000 Head",Turkeys,11979.00,Federally Inspected,Poultry
15280,Feb-1983,1983,2,"1,000 Head",Turkeys,8477.00,Federally Inspected,Poultry
15281,Jan-1983,1983,1,"1,000 Head",Turkeys,8532.00,Federally Inspected,Poultry


In [495]:
slaughter_df['Year'] = slaughter_df['Year'].astype(str)
slaughter_df['Month'] = slaughter_df['Month'].astype(str)

slaughter_df['DateTime'] = slaughter_df['Year'] + '-' + slaughter_df['Month'].str.rjust(2, '0')

In [496]:
slaughter_df['Count'] = slaughter_df['Count'].str.replace(',', '').astype(float)
slaughter_df['Count'] = slaughter_df['Count'] * 1e3
slaughter_df = slaughter_df.drop(columns=['Date', 'Unit', 'Year', 'Month'])

In [None]:
# slaughter_df = slaughter_df[['DateTime', 'Animal', 'Production', 'Commercial or Federally Inspected', 'Type_Of_Meat']]
# slaughter_df = slaughter_df.rename(columns={'Type_Of_Meat': TYPE_OF_MEAT})
# slaughter_df