## TRADE DATA

In [26]:
import pandas as pd
import datetime

def process_csv(filepath, product_index):
    """
    This function takes the file path and product index as inputs and returns the filtered trade data dataframe
    for a particular product.
    
    Parameters:
    filepath (str) : file path for the trade data file
    product_index (int) : index of the product in the list of all products
    
    Returns:
    df_product_filtered (pandas dataframe) : dataframe for the filtered product
    """
    
    # Read the CSV file and convert the "TIME_PERIOD" column to datetime
    trade_data = pd.read_csv(filepath)
    trade_data["TIME_PERIOD"] = pd.to_datetime(trade_data["TIME_PERIOD"], format = '%Y-%m').dt.date
    
    # Rename columns and remove duplicates
    trade_data = trade_data.rename(columns={"partner": "PARTNER_codes", "declarant": "DECLARANT_codes"})
    trade_data = trade_data.drop_duplicates()
    
    # Get the list of all products
    products = trade_data["product"].drop_duplicates().to_list()
    
    # Filter the data for a particular product
    product_filter = products[product_index]
    
    # Read the partner countries data
    partner_countries = pd.read_csv("trade_data/partners.csv", sep = ";")
    
    # Merge the trade data and partner country data on the "PARTNER_codes" column
    trade_data = pd.merge(trade_data, partner_countries, on = ['PARTNER_codes'], how = 'inner')
    
    # Pivot the trade data to create a multi-index dataframe
    trade_data_pivot = trade_data.pivot(index=['PARTNER_Labels', 'TIME_PERIOD', 'product'], 
                                  columns='indicators', 
                                  values=['OBS_VALUE'])
    
    # Flatten the column names
    trade_data_pivot.columns = trade_data_pivot.columns.map('_'.join)
    
    # Reset the index and remove missing values
    trade_data_pivot = trade_data_pivot.reset_index()
    trade_data_pivot = trade_data_pivot.dropna()
    
    # Filter the data for the selected product
    df = trade_data_pivot
    df_product_filtered = df[df['product'] == product_filter]

    
    return df_product_filtered

#Declare DataFrames 
df_maize_corn_0 = process_csv("trade_data/maize_corn.csv", 0)
df_maize_corn_0['price'] = df_maize_corn_0['OBS_VALUE_VALUE_1000EURO'] / df_maize_corn_0['OBS_VALUE_QUANTITY_TON']

# Keep '10059000' the same but name it 'maize'
df_maize = df_maize_corn_0
df_maize['product'] = 'maize'


## WEATHER DATA

In [33]:
import pandas as pd
import datetime

#Clean import oil_price_index_2015_oecd

# Read the data from the weather file
df_global_temperature = pd.read_csv('weather_data/global_temperature_data.csv')

# Rename the 'YearMonth' column to 'TIME_PERIOD'
df_global_temperature.rename(columns={'yearmonth': 'TIME_PERIOD', 'partner_labels': 'PARTNER_Labels'}, inplace=True)

# Convert the TIME_PERIOD column to datetime format
df_global_temperature['TIME_PERIOD'] = pd.to_datetime(df_global_temperature['TIME_PERIOD'], format='%Y%m')

# Convert the TIME_PERIOD column to the desired format
df_global_temperature['TIME_PERIOD'] = df_global_temperature['TIME_PERIOD'].dt.strftime('%Y-%m-%d')

# Drop columns
df_global_temperature.drop(columns=['partner_code', 'weather_countrycode', 'weather_countryname', 'iso_countrycode'], inplace=True)

df_global_temperature['TIME_PERIOD'] = pd.to_datetime(df_global_temperature['TIME_PERIOD'])
df_global_temperature

Unnamed: 0,PARTNER_Labels,TIME_PERIOD,AVG_TAVG,MIN_TMIN,MAX_TMAX
0,United Arab Emirates,2005-01-01,186.096154,110.000000,280.000000
1,United Arab Emirates,2005-02-01,204.750000,106.000000,306.500000
2,United Arab Emirates,2005-03-01,238.226667,124.500000,363.666667
3,United Arab Emirates,2005-04-01,280.984568,155.000000,423.333333
4,United Arab Emirates,2005-05-01,321.296296,192.000000,435.500000
...,...,...,...,...,...
21085,Viet Nam,2022-08-01,287.060676,244.363636,348.666667
21086,Viet Nam,2022-09-01,279.751352,233.600000,350.000000
21087,Viet Nam,2022-10-01,246.000000,238.000000,260.000000
21088,Viet Nam,2022-11-01,272.000000,252.000000,304.000000


## OIL DATA

In [34]:
import pandas as pd
import datetime

#Clean import oil_price_index_2015_oecd

# Read the data from the 'oil_price_index_2015_oecd.xlsx' file
df_oil_index = pd.read_excel('oil_prices/oil_price_index_2015_oecd.xlsx', skiprows=[0,1,2,3,4, 9012, 9013, 9014, 9015])
# Forward fill the 'Country' column
df_oil_index['Country'].fillna(method='ffill', inplace=True)
# Rename columns
df_oil_index.rename(columns={'Unnamed: 3': 'CPI_ENERGY'}, inplace=True)
df_oil_index.rename(columns={'Unnamed: 4': 'CPI_ENERGY_HARMONISED'}, inplace=True)
# Delete the 'Unnamed: 2' column
df_oil_index.drop(columns=['Unnamed: 2'], inplace=True)
# Convert the 'Time' column to datetime format
df_oil_index['Time'] = pd.to_datetime(df_oil_index['Time'], format='%b-%Y')
df_oil_index.rename(columns={'Time': 'TIME_INDEX'}, inplace=True)

#Clean import CMO-Historical-Data-Monthly.xlsx

df_commodities_price = pd.read_excel('oil_prices/CMO-Historical-Data-Monthly.xlsx', sheet_name = 'Monthly Prices', skiprows=[0,1,2,3,4,5])
# Rename columns
df_commodities_price.rename(columns={'Unnamed: 0': 'TIME_PERIOD'}, inplace=True)
df_commodities_price.rename(columns={'CRUDE_PETRO': 'OIL_PRICE $/bbl'}, inplace=True)
df_commodities_price['TIME_PERIOD'] = pd.to_datetime(df_commodities_price['TIME_PERIOD'], format='%YM%m')

df_oil_price_full = pd.DataFrame(df_commodities_price, columns=['TIME_PERIOD', 'OIL_PRICE $/bbl'])

df_oil_price = df_oil_price_full.loc[(df_oil_price_full['TIME_PERIOD'] >= '2005-01-01') & (df_oil_price_full['TIME_PERIOD'] <= '2022-10-01')]
df_oil_price

Unnamed: 0,TIME_PERIOD,OIL_PRICE $/bbl
540,2005-01-01,42.972278
541,2005-02-01,44.818211
542,2005-03-01,50.942879
543,2005-04-01,50.640476
544,2005-05-01,47.826573
...,...,...
749,2022-06-01,116.800000
750,2022-07-01,105.083333
751,2022-08-01,95.973333
752,2022-09-01,88.220000


## FUTURES PRICES (CORN)

In [65]:
import pandas as pd
import datetime

#Clean import oil_price_index_2015_oecd

# Read the data from the weather file
df_futures_corn = pd.read_csv('futures_data/US Corn Futures Historical Data.csv')

# Rename the 'date' column to 'TIME_PERIOD'
df_futures_corn.rename(columns={'Date': 'TIME_PERIOD', 'Price': 'Corn Price Futures'}, inplace=True)

df_futures_corn['TIME_PERIOD'] = pd.to_datetime(df_futures_corn['TIME_PERIOD'], format='%d/%m/%Y')
df_futures_corn['TIME_PERIOD'] = df_futures_corn['TIME_PERIOD'].dt.strftime('%Y/%d/%m')

df_futures_corn.drop(columns=['Open','High','Low','Vol.','Change %'], inplace=True)

df_futures_corn['TIME_PERIOD'] = pd.to_datetime(df_futures_corn['TIME_PERIOD'])

df_futures_corn.sort_values(by='TIME_PERIOD', ascending=True, inplace=True)
df_futures_corn

Unnamed: 0,TIME_PERIOD,Corn Price Futures
212,2005-02-01,222.75
211,2005-03-01,221.00
210,2005-04-01,213.50
209,2005-05-01,230.75
208,2005-06-01,222.25
...,...,...
4,2022-06-01,743.75
3,2022-07-01,616.25
2,2022-08-01,673.75
1,2022-09-01,677.50


## Merged trade data, oil price

In [66]:
df_maize['TIME_PERIOD'] = pd.to_datetime(df_maize['TIME_PERIOD'])
df_merged_trade_oil = df_maize.merge(df_oil_price, on='TIME_PERIOD', how='left')
df_merged_trade_oil

Unnamed: 0,PARTNER_Labels,TIME_PERIOD,product,OBS_VALUE_QUANTITY_TON,OBS_VALUE_VALUE_1000EURO,price,OIL_PRICE $/bbl
0,Afghanistan,2021-03-01,maize,0.0,0.04,inf,63.830000
1,Antigua and Barbuda,2020-01-01,maize,54.0,21.20,0.392593,61.626667
2,Antigua and Barbuda,2020-04-01,maize,27.0,12.14,0.449630,21.043333
3,Argentina,2005-01-01,maize,8100.0,937.50,0.115741,42.972278
4,Argentina,2005-02-01,maize,3172.0,392.55,0.123755,44.818211
...,...,...,...,...,...,...,...
5534,Zimbabwe,2007-01-01,maize,0.1,0.13,1.300000,53.516970
5535,Zimbabwe,2010-12-01,maize,0.3,1.11,3.700000,90.005962
5536,Zimbabwe,2011-01-01,maize,0.1,0.38,3.800000,92.690595
5537,Zimbabwe,2011-03-01,maize,0.2,0.38,1.900000,108.645217


## Merged trade data, oil price, weather data

In [67]:
df_merged_trade_oil_weather = df_merged_trade_oil.merge(df_global_temperature, on=['TIME_PERIOD', 'PARTNER_Labels'], how='left')
df_merged_trade_oil_weather

Unnamed: 0,PARTNER_Labels,TIME_PERIOD,product,OBS_VALUE_QUANTITY_TON,OBS_VALUE_VALUE_1000EURO,price,OIL_PRICE $/bbl,AVG_TAVG,MIN_TMIN,MAX_TMAX
0,Afghanistan,2021-03-01,maize,0.0,0.04,inf,63.830000,,,
1,Antigua and Barbuda,2020-01-01,maize,54.0,21.20,0.392593,61.626667,,,
2,Antigua and Barbuda,2020-04-01,maize,27.0,12.14,0.449630,21.043333,,,
3,Argentina,2005-01-01,maize,8100.0,937.50,0.115741,42.972278,,,
4,Argentina,2005-02-01,maize,3172.0,392.55,0.123755,44.818211,,,
...,...,...,...,...,...,...,...,...,...,...
5534,Zimbabwe,2007-01-01,maize,0.1,0.13,1.300000,53.516970,,,
5535,Zimbabwe,2010-12-01,maize,0.3,1.11,3.700000,90.005962,,,
5536,Zimbabwe,2011-01-01,maize,0.1,0.38,3.800000,92.690595,,,
5537,Zimbabwe,2011-03-01,maize,0.2,0.38,1.900000,108.645217,,,


## Merged trade data, oil price, weather, future prices data

In [68]:
df_merged = df_merged_trade_oil_weather.merge(df_futures_corn, on='TIME_PERIOD', how='left')
df_merged

Unnamed: 0,PARTNER_Labels,TIME_PERIOD,product,OBS_VALUE_QUANTITY_TON,OBS_VALUE_VALUE_1000EURO,price,OIL_PRICE $/bbl,AVG_TAVG,MIN_TMIN,MAX_TMAX,Corn Price Futures
0,Afghanistan,2021-03-01,maize,0.0,0.04,inf,63.830000,,,,564.25
1,Antigua and Barbuda,2020-01-01,maize,54.0,21.20,0.392593,61.626667,,,,381.25
2,Antigua and Barbuda,2020-04-01,maize,27.0,12.14,0.449630,21.043333,,,,311.50
3,Argentina,2005-01-01,maize,8100.0,937.50,0.115741,42.972278,,,,
4,Argentina,2005-02-01,maize,3172.0,392.55,0.123755,44.818211,,,,222.75
...,...,...,...,...,...,...,...,...,...,...,...
5534,Zimbabwe,2007-01-01,maize,0.1,0.13,1.300000,53.516970,,,,415.75
5535,Zimbabwe,2010-12-01,maize,0.3,1.11,3.700000,90.005962,,,,636.50
5536,Zimbabwe,2011-01-01,maize,0.1,0.38,3.800000,92.690595,,,,670.00
5537,Zimbabwe,2011-03-01,maize,0.2,0.38,1.900000,108.645217,,,,701.00


## End result for now

In [69]:
###FOR NOW
df_maize_corn_trade_weather_futures = df_merged.pivot_table(index='TIME_PERIOD', columns=['PARTNER_Labels'], values=['OBS_VALUE_QUANTITY_TON', 'OBS_VALUE_VALUE_1000EURO', 'price', 'OIL_PRICE $/bbl', 'AVG_TAVG', 'MIN_TMIN', 'MAX_TMAX', 'Corn Price Futures'])
df_maize_corn_trade_weather_futures

Unnamed: 0_level_0,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,...,price,price,price,price,price,price,price,price,price,price
PARTNER_Labels,Australia,Austria,Belgium,Bosnia and Herzegovina,Brazil,Bulgaria,Cameroon,Canada,Chile,China,...,Türkiye,Uganda,Ukraine,United Arab Emirates,United Kingdom,United States,Uruguay,Viet Nam,Zambia,Zimbabwe
TIME_PERIOD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2005-01-01,,,30.700000,,,,,,,,...,0.710000,,,,0.314011,0.334963,,,,3.080
2005-02-01,,,20.000000,,266.743474,,,,,,...,,,,,0.380808,0.270379,,,,2.550
2005-03-01,,-1.994624,55.800000,,,,,,,,...,,,,,0.329100,0.274034,,,,1.900
2005-04-01,,60.233333,111.000000,,,,,,,,...,,,inf,,0.292759,0.365034,,,,0.775
2005-05-01,,108.758065,129.250000,,,,,,,,...,,,,,0.336029,0.298552,,,,inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-01,137.954664,182.161522,180.681818,,215.582738,219.847222,,141.197971,,222.902520,...,0.424940,,0.383358,,0.460000,0.424965,,,,
2022-07-01,,177.245503,190.000000,,227.424074,245.580645,,176.170636,,,...,0.350000,,0.359280,,0.480000,0.424000,,2.15,,
2022-08-01,,175.258849,213.200000,,222.129167,243.446237,,179.322868,,234.647872,...,0.300000,,0.357720,,0.430072,0.430002,,,,
2022-09-01,,103.779082,166.000000,,221.270833,180.200000,,131.132246,,,...,0.428571,inf,0.313625,,0.480000,0.430013,,,,


## Save excel file

In [70]:
# Set the path and name of the Excel file
file_path = "df_maize_trade_oil_weather_futures.xlsx"

# Write the df_pivot dataframe to the specified Excel file
df_maize_corn_trade_weather_futures.to_excel(file_path, index=True)

## Simplicity/overview: filter top 5 countries

In [71]:
# Filter the data to include only the desired countries
df_filtered = df_merged[df_merged['PARTNER_Labels'].isin(['Ukraine', 'France', 'Brazil', 'Germany', 'Romania'])]

# Create the pivot table
df_filtered_maize_corn_trade_weather_futures = df_filtered.pivot_table(index='TIME_PERIOD', columns=['PARTNER_Labels'], values=['OBS_VALUE_QUANTITY_TON', 'OBS_VALUE_VALUE_1000EURO', 'price', 'OIL_PRICE $/bbl', 'AVG_TAVG', 'MIN_TMIN', 'MAX_TMAX', 'Corn Price Futures'])
df_filtered_maize_corn_trade_weather_futures

Unnamed: 0_level_0,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,Corn Price Futures,Corn Price Futures,Corn Price Futures,Corn Price Futures,Corn Price Futures,...,OIL_PRICE $/bbl,OIL_PRICE $/bbl,OIL_PRICE $/bbl,OIL_PRICE $/bbl,OIL_PRICE $/bbl,price,price,price,price,price
PARTNER_Labels,Brazil,France,Germany,Romania,Ukraine,Brazil,France,Germany,Romania,Ukraine,...,Brazil,France,Germany,Romania,Ukraine,Brazil,France,Germany,Romania,Ukraine
TIME_PERIOD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2005-01-01,,50.081198,28.651592,,,,,,,,...,,42.972278,42.972278,,,,0.122632,0.120429,,
2005-02-01,266.743474,30.689158,-1.962406,,,222.75,222.75,222.75,,,...,44.818211,44.818211,44.818211,,,0.164250,0.124028,0.109035,,
2005-03-01,,76.202630,45.730051,,,,221.00,221.00,,,...,,50.942879,50.942879,,,,0.123698,0.118774,,
2005-04-01,,108.081105,101.859649,,90.622214,,213.50,213.50,,213.50,...,,50.640476,50.640476,,50.640476,,0.117439,0.109957,,inf
2005-05-01,,147.068032,134.667233,,,,230.75,230.75,,,...,,47.826573,47.826573,,,,0.123325,0.116828,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-01,215.582738,197.346344,188.737990,200.784686,209.451222,743.75,743.75,743.75,743.75,743.75,...,116.800000,116.800000,116.800000,116.800000,116.800000,0.400000,0.277599,0.549137,0.330063,0.383358
2022-07-01,227.424074,220.948883,198.723435,220.287174,212.861598,616.25,616.25,616.25,616.25,616.25,...,105.083333,105.083333,105.083333,105.083333,105.083333,0.351755,0.361186,0.383719,0.621907,0.359280
2022-08-01,222.129167,221.881986,208.800878,221.240503,224.523306,673.75,673.75,673.75,673.75,673.75,...,95.973333,95.973333,95.973333,95.973333,95.973333,0.211752,0.289791,0.340837,0.540233,0.357720
2022-09-01,221.270833,170.650565,139.969363,159.536429,136.176826,677.50,677.50,677.50,677.50,677.50,...,88.220000,88.220000,88.220000,88.220000,88.220000,0.296534,0.435231,0.155384,0.334250,0.313625


In [72]:
df_filtered_maize_corn_trade_weather_futures.to_excel("df_filtered_maize_trade_oil_weather_futures.xlsx", index=True)