Data Analysis

In [212]:
import warnings
warnings.filterwarnings("ignore")

import _pickle as cPickle
import pandas as pd
import numpy as np

In [213]:
cmdty_entire_df = pd.read_csv('commodities_data.csv')
cmdty_entire_df['Date'] = pd.to_datetime(cmdty_entire_df['Date'])
cmdty_entire_df['YearMonth'] = cmdty_entire_df['Date'].dt.to_period('M')
cmdty_entire_df.head()

Unnamed: 0,Commodity,Contract,Date,PX_LAST,YearMonth
0,Barley,1,1991-03-25,206.23,1991-03
1,Barley,1,1991-03-26,206.02,1991-03
2,Barley,1,1991-03-27,205.17,1991-03
3,Barley,1,1991-03-28,205.17,1991-03
4,Barley,1,1991-04-01,207.29,1991-04


In [214]:
len((cmdty_entire_df['Commodity']).unique())

30

In [215]:
cmdty_entire_df.sort_values(by=['Commodity','Date'])
cmdty_entire_df.fillna(method='ffill', inplace=True)
cmdty_entire_df.head()

Unnamed: 0,Commodity,Contract,Date,PX_LAST,YearMonth
0,Barley,1,1991-03-25,206.23,1991-03
1,Barley,1,1991-03-26,206.02,1991-03
2,Barley,1,1991-03-27,205.17,1991-03
3,Barley,1,1991-03-28,205.17,1991-03
4,Barley,1,1991-04-01,207.29,1991-04


In [216]:
cmdty_cntrct_2_df = cmdty_entire_df[cmdty_entire_df['Contract']==2]
cmdty_cntrct_2_df = cmdty_cntrct_2_df.set_index('Date')
cmdty_cntrct_2_df.head()

Unnamed: 0_level_0,Commodity,Contract,PX_LAST,YearMonth
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1991-03-25,Barley,2,182.45,1991-03
1991-03-26,Barley,2,181.86,1991-03
1991-03-27,Barley,2,183.44,1991-03
1991-03-28,Barley,2,187.38,1991-03
1991-04-01,Barley,2,188.96,1991-04


## Compute Number of Observations

In [217]:
monthly_observations = cmdty_entire_df.groupby(['Commodity', 'YearMonth']).size().reset_index(name='Observations')
cmdty_obs_count = cmdty_entire_df.groupby('Commodity')['YearMonth'].nunique().reset_index(name='N')
cmdty_obs_count.head()

Unnamed: 0,Commodity,N
0,Aluminium,116
1,Barley,214
2,Broilers,103
3,Canola,324
4,Coal,3


## Returns, Volatility, and Sharpe Calculations

In [218]:
# Get price for Max Date for Each Commodity for Each Month for Contract 2
cmdty_cntrct_2_df = cmdty_cntrct_2_df.reset_index()
max_date_px_last_cntrct_2 = cmdty_cntrct_2_df.groupby(['Commodity', 'YearMonth']).apply(
    lambda x: x.loc[x['Date'].idxmax(), ['Date', 'PX_LAST']]).reset_index()
max_date_px_last_cntrct_2.sort_values(by=['Commodity','YearMonth'], inplace =True)
max_date_px_last_cntrct_2.set_index('Date', inplace=True)
max_date_px_last_cntrct_2.head()

Unnamed: 0_level_0,Commodity,YearMonth,PX_LAST
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1999-05-28,Aluminium,1999-05,75.68
1999-06-30,Aluminium,1999-06,82.36
1999-07-30,Aluminium,1999-07,82.42
1999-08-31,Aluminium,1999-08,85.56
1999-09-30,Aluminium,1999-09,87.55


In [219]:
max_date_px_last_cntrct_2[max_date_px_last_cntrct_2['Commodity'] == 'Broilers']

Unnamed: 0_level_0,Commodity,YearMonth,PX_LAST
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [220]:
# Pivoted prices for Contract 2
max_date_px_last_cntrct_2_pivot = max_date_px_last_cntrct_2.pivot_table(index = 'Date', columns = 'Commodity', values = 'PX_LAST')
max_date_px_last_cntrct_2_pivot

Commodity,Aluminium,Barley,Canola,Cocoa,Coffee,Copper,Corn,Cotton,Crude Oil,Feeder cattle,...,Orange juice,Palladium,Platinum,Propane,Rough rice,Silver,Soybean meal,Soybeans,Unleaded gas,Wheat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01-30,,,,2527.0,,,2061.8750,61.78,,,...,209.01,,,,,,107.09,654.3750,,976.4375
1970-02-27,,,,2194.0,,,2070.2500,61.68,,,...,162.52,,,,,,110.16,661.3750,,1018.8125
1970-03-31,,,,2163.0,,,2066.1250,61.42,,,...,162.11,,,,,,107.04,660.3750,,1003.1875
1970-04-30,,,,2095.0,,,2157.9375,59.58,,,...,172.42,,,,,,111.13,672.3125,,1044.5000
1970-05-28,,,,1981.0,,,2145.3125,60.72,,,...,153.26,,,,,,109.25,673.1875,,991.8750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008-08-29,128.26,248.88,569.19,2875.0,152.87,339.75,623.0000,74.54,126.21,110.729,...,121.84,312.94,1513.65,176.47,19.688,13.838,363.08,1359.1250,,845.1250
2008-09-30,114.18,196.60,449.92,2569.0,137.93,290.36,522.4375,61.15,109.72,104.152,...,97.81,206.76,1038.00,146.04,19.621,12.394,289.04,1076.1875,,718.1250
2008-10-31,95.87,187.78,429.60,2070.0,120.48,184.78,432.4375,48.06,74.47,98.525,...,84.45,203.68,837.60,94.93,15.595,9.754,275.49,945.2500,,570.1250
2008-11-28,82.85,155.10,409.90,2281.0,118.60,166.10,377.2500,47.86,59.51,92.225,...,78.80,196.25,888.30,73.88,13.480,10.245,255.40,891.0000,,574.7500


In [221]:
cmdty_cntrct_2_rets_df = max_date_px_last_cntrct_2_pivot.pct_change()

In [222]:
def Performance_Metrics(df_asset_returns, annualizing_period = 12):
    """
    Input:DataFrame of Returns/Excess Returns
    Output:DataFrame for Summary Statistics: 
                    1. Annualized Mean, 
                    2. Annualized Vol, 
                    3. Annualized Sharpe Ratio
    """
    mean_returns = df_asset_returns.mean() * annualizing_period * 100
    std_returns = df_asset_returns.std() * np.sqrt(annualizing_period) * 100
    sharpe_ratio = mean_returns/std_returns
    performance_metrics = pd.DataFrame({"Annualized Mean Returns": mean_returns, "Annualized Vol": std_returns, "Annualized Sharpe Ratio" : sharpe_ratio})
    return performance_metrics

In [223]:
cmdty_cntrct_2_perf_metrics_df = Performance_Metrics(cmdty_cntrct_2_rets_df)
cmdty_cntrct_2_perf_metrics_df.head()

Unnamed: 0_level_0,Annualized Mean Returns,Annualized Vol,Annualized Sharpe Ratio
Commodity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aluminium,1.211298,18.460714,0.065615
Barley,0.991459,18.742002,0.0529
Canola,-0.728509,19.396765,-0.037558
Cocoa,4.529431,30.219851,0.149883
Coffee,6.466619,35.995597,0.17965


In [224]:
table_1 = pd.merge(cmdty_obs_count, cmdty_cntrct_2_perf_metrics_df, how = 'left',left_on='Commodity', right_on='Commodity')
table_1

Unnamed: 0,Commodity,N,Annualized Mean Returns,Annualized Vol,Annualized Sharpe Ratio
0,Aluminium,116,1.211298,18.460714,0.065615
1,Barley,214,0.991459,18.742002,0.0529
2,Broilers,103,,,
3,Canola,324,-0.728509,19.396765,-0.037558
4,Coal,3,,,
5,Cocoa,468,4.529431,30.219851,0.149883
6,Coffee,437,6.466619,35.995597,0.17965
7,Copper,241,8.956501,24.984615,0.358481
8,Corn,468,-1.097886,24.113173,-0.045531
9,Cotton,468,2.073381,23.198005,0.089378


## Basis Calculation

In [225]:
max_contract_px_last = cmdty_entire_df.groupby(['Commodity','YearMonth']).apply(
    lambda x: x.loc[x['Contract'].idxmax()]).reset_index(drop=True)
max_contract_px_last = max_contract_px_last[max_contract_px_last['Contract'] > 2]
max_contract_px_last.sort_values(by=['Commodity', 'YearMonth'], inplace=True)
max_contract_px_last.set_index('Date', inplace=True)
max_contract_px_last

Unnamed: 0_level_0,Commodity,Contract,PX_LAST,YearMonth
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999-05-18,Aluminium,4,33.33,1999-05
1999-06-01,Aluminium,4,31.65,1999-06
1999-07-01,Aluminium,4,34.25,1999-07
1999-08-02,Aluminium,4,33.93,1999-08
1999-09-01,Aluminium,4,36.18,1999-09
...,...,...,...,...
2008-08-01,Wheat,10,904.00,2008-08
2008-09-02,Wheat,11,888.75,2008-09
2008-10-01,Wheat,11,787.75,2008-10
2008-11-03,Wheat,11,683.75,2008-11


In [226]:
# max_contract_px_last_pivot = max_contract_px_last.pivot_table(index = 'Date', columns = ['Commodity','Contract'], values = 'PX_LAST')
# max_contract_px_last_pivot.fillna(method='ffill', inplace=True)
# max_contract_px_last_pivot

In [232]:
max_date_px_last_cntrct_2.reset_index(inplace=True)
max_contract_px_last.reset_index(inplace=True)
cmdtry_cntrct_2_and_latest_df = pd.merge(max_date_px_last_cntrct_2, max_contract_px_last, how='left', left_on=['Commodity', 'YearMonth'], right_on=['Commodity', 'YearMonth'])
cmdtry_cntrct_2_and_latest_df

Unnamed: 0,Date_x,Commodity,YearMonth,PX_LAST_x,index,Date_y,Contract,PX_LAST_y
0,1999-05-28,Aluminium,1999-05,75.680,0.0,1999-05-18,4.0,33.33
1,1999-06-30,Aluminium,1999-06,82.360,1.0,1999-06-01,4.0,31.65
2,1999-07-30,Aluminium,1999-07,82.420,2.0,1999-07-01,4.0,34.25
3,1999-08-31,Aluminium,1999-08,85.560,3.0,1999-08-02,4.0,33.93
4,1999-09-30,Aluminium,1999-09,87.550,4.0,1999-09-01,4.0,36.18
...,...,...,...,...,...,...,...,...
9447,2008-08-29,Wheat,2008-08,845.125,9309.0,2008-08-01,10.0,904.00
9448,2008-09-30,Wheat,2008-09,718.125,9310.0,2008-09-02,11.0,888.75
9449,2008-10-31,Wheat,2008-10,570.125,9311.0,2008-10-01,11.0,787.75
9450,2008-11-28,Wheat,2008-11,574.750,9312.0,2008-11-03,11.0,683.75


In [233]:
cmdtry_cntrct_2_and_latest_df['Contract_Base_Num'] = 2
cmdtry_cntrct_2_and_latest_df

Unnamed: 0,Date_x,Commodity,YearMonth,PX_LAST_x,index,Date_y,Contract,PX_LAST_y,Contract_Base_Num
0,1999-05-28,Aluminium,1999-05,75.680,0.0,1999-05-18,4.0,33.33,2
1,1999-06-30,Aluminium,1999-06,82.360,1.0,1999-06-01,4.0,31.65,2
2,1999-07-30,Aluminium,1999-07,82.420,2.0,1999-07-01,4.0,34.25,2
3,1999-08-31,Aluminium,1999-08,85.560,3.0,1999-08-02,4.0,33.93,2
4,1999-09-30,Aluminium,1999-09,87.550,4.0,1999-09-01,4.0,36.18,2
...,...,...,...,...,...,...,...,...,...
9447,2008-08-29,Wheat,2008-08,845.125,9309.0,2008-08-01,10.0,904.00,2
9448,2008-09-30,Wheat,2008-09,718.125,9310.0,2008-09-02,11.0,888.75,2
9449,2008-10-31,Wheat,2008-10,570.125,9311.0,2008-10-01,11.0,787.75,2
9450,2008-11-28,Wheat,2008-11,574.750,9312.0,2008-11-03,11.0,683.75,2


In [228]:
print(len(max_date_px_last_cntrct_2),len(max_contract_px_last))

9452 9314
