In [3]:
import pandas as pd
import datetime

def process_csv(filepath, product_index):
    """
    This function takes the file path and product index as inputs and returns the filtered trade data dataframe
    for a particular product.
    
    Parameters:
    filepath (str) : file path for the trade data file
    product_index (int) : index of the product in the list of all products
    
    Returns:
    df_product_filtered (pandas dataframe) : dataframe for the filtered product
    """
    
    # Read the CSV file and convert the "TIME_PERIOD" column to datetime
    trade_data = pd.read_csv(filepath)
    trade_data["TIME_PERIOD"] = pd.to_datetime(trade_data["TIME_PERIOD"], format = '%Y-%m').dt.date
    
    # Rename columns and remove duplicates
    trade_data = trade_data.rename(columns={"partner": "PARTNER_codes", "declarant": "DECLARANT_codes"})
    trade_data = trade_data.drop_duplicates()
    
    # Get the list of all products
    products = trade_data["product"].drop_duplicates().to_list()
    
    # Filter the data for a particular product
    product_filter = products[product_index]
    
    # Read the partner countries data
    partner_countries = pd.read_csv("trade_data/partners.csv", sep = ";")
    
    # Merge the trade data and partner country data on the "PARTNER_codes" column
    trade_data = pd.merge(trade_data, partner_countries, on = ['PARTNER_codes'], how = 'inner')
    
    # Pivot the trade data to create a multi-index dataframe
    trade_data_pivot = trade_data.pivot(index=['PARTNER_Labels', 'TIME_PERIOD', 'product'], 
                                  columns='indicators', 
                                  values=['OBS_VALUE'])
    
    # Flatten the column names
    trade_data_pivot.columns = trade_data_pivot.columns.map('_'.join)
    
    # Reset the index and remove missing values
    trade_data_pivot = trade_data_pivot.reset_index()
    trade_data_pivot = trade_data_pivot.dropna()
    
    # Filter the data for the selected product
    df = trade_data_pivot
    df_product_filtered = df[df['product'] == product_filter]
    
    # group by PARTNER_Labels and TIME_PERIOD and aggregate the OBS_VALUE_QUANTITY_TON and OBS_VALUE_VALUE_1000EURO columns by summing
    grouped_df = df_product_filtered.groupby(['PARTNER_Labels', 'TIME_PERIOD']).agg({'OBS_VALUE_QUANTITY_TON': 'sum', 'OBS_VALUE_VALUE_1000EURO': 'sum'})
    
    # Filter the grouped data for the selected product and calculate globalprice
    grouped_df = grouped_df.reset_index()
    grouped_df = grouped_df[grouped_df['product'] == product_filter]
    grouped_df['globalprice'] = grouped_df['OBS_VALUE_VALUE_1000EURO'] / grouped_df['OBS_VALUE_QUANTITY_TON']
    
    # Merge the filtered product data and globalprice data on PARTNER_Labels and TIME_PERIOD columns
    df_product_filtered = pd.merge(df_product_filtered, grouped_df[['PARTNER_Labels', 'TIME_PERIOD', 'globalprice']], on=['PARTNER_Labels', 'TIME_PERIOD'], how='inner')

    return df_product_filtered

#Declare DataFrames 
df_maize_corn1 = process_csv("trade_data/maize_corn.csv", 0)
df_maize_corn1.head(500)


KeyError: 'product'