In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

# Lithium

In [2]:
# Load data source #1 - Lithium prices in USD per kilogram
df1 = pd.read_csv('/Users/michal/Documents/Code/metals/data/Lithium_prices_2017-01-01_to_2021-12-31_merged.csv')
df1['Date'] = pd.to_datetime(df1['Date'])
df1['Price'] = df1['Price'].astype(float)
df1 = df1.drop(columns=['Unit'])
df1.head()

Unnamed: 0,Date,Price
0,2017-05-10,20.92
1,2017-05-11,20.92
2,2017-05-12,20.92
3,2017-05-15,20.92
4,2017-05-16,20.92


In [3]:
# Load data source #2 - South America LOB
df2 = pd.read_csv('/Users/michal/Documents/Code/metals/bloomberg_data/lithium_SouthAmerica_LOB_2017m.csv', sep=';')
df2.head()
df2['Date'] = pd.to_datetime(df2['Date'])
df2['Price'] = df2['Price'].astype(float)
df2['Price'] = df2['Price']/1000 # WARNING: DIVIDING BY 1000
df2.head()

  df2['Date'] = pd.to_datetime(df2['Date'])


Unnamed: 0,Date,Price
0,2016-12-31,10.0
1,2017-01-31,10.0
2,2017-02-28,10.25
3,2017-03-31,10.25
4,2017-04-30,10.5


In [4]:
# Load data source #3 - COMEX Lithium Hydroxide future
df3 = pd.read_csv('/Users/michal/Documents/Code/metals/reuters_data/COMEX Lithium Hydroxide CIF CJK (Fastmarkets) Electronic Commodity Future Continuation 1.csv', sep=';')
df3 = df3.rename(columns={'Exchange Date': 'Date'})
df3 = df3.rename(columns={'Close' : 'Price'})
df3['Date'] = pd.to_datetime(df3['Date'], format='%d-%b-%Y')
df3 = df3.iloc[:, :4] # Keep only the first 4 columns
df3.head()

Unnamed: 0,Date,Price,Net,%Chg
0,2021-05-03,14,,
1,2021-05-04,14,0.0,0%
2,2021-05-05,14,0.0,-3%
3,2021-05-06,14,0.0,0%
4,2021-05-07,14,0.0,0%


In [5]:
# Load Lithium data source #4 - Lithium Americas Corp
df4 = pd.read_csv('/Users/michal/Documents/Code/metals/reuters_data/Lithium Americas Corp.csv', sep=';')
df4 = df4.rename(columns={'Exchange Date': 'Date'})
df4 = df4.rename(columns={'Close' : 'Price'})
df4['Date'] = pd.to_datetime(df4['Date'], format='%d-%b-%Y')
df4 = df4.iloc[:, :2] # Keep only the first 2 columns
df4['Price'] = df4['Price'].str.replace(',', '.').astype(float)
df4.head()

Unnamed: 0,Date,Price
0,2025-05-13,3.04
1,2025-05-12,3.15
2,2025-05-09,3.16
3,2025-05-08,3.14
4,2025-05-07,3.07


In [31]:
# Load Lithium data source #5 - East Asia Lithium Carbonate 99.5% Swap
df5 = pd.read_csv('/Users/michal/Documents/Code/metals/bloomberg_data/East Asia Lithium Carbonate 99.5% CIF CJK Financial Swap USD:MT (Fastmarkets) Singapore Exchange SIMEX.csv', sep=';')
df5 = df5.iloc[:, :2]
df5['Date'] = pd.to_datetime(df5['Date'], format='%m/%d/%y')
df5['Close Price'] = df5['Close Price'].str.replace(',', '.').astype(float)
df5 = df5.rename(columns={'Close Price': 'Price'})
df5 = df5.sort_values(by='Date')
df5.head()

Unnamed: 0,Date,Price
493,2023-05-02,37.75
492,2023-05-03,36.44
491,2023-05-04,36.48
490,2023-05-05,36.48
489,2023-05-08,36.48


In [32]:
# Load Lithium data source #6 - East Asia Lithium Carbonate Battery Grade CIF
df6 = pd.read_csv('/Users/michal/Documents/Code/metals/bloomberg_data/East Asia Lithium Carbonate China Korea Japan Battery Grade CIF USD:kg Future Singapore Exchange SIMEX.csv', sep=';')
df6 = df6.iloc[:, :2]
df6['Date'] = pd.to_datetime(df6['Date'], format='%m/%d/%y')
df6['Close Price'] = df6['Close Price'].str.replace(',', '.').astype(float)
df6 = df6.rename(columns={'Close Price': 'Price'})
df6 = df6.sort_values(by='Date')
df6.head()

Unnamed: 0,Date,Price
512,2023-04-03,43.7
511,2023-04-04,42.94
510,2023-04-05,42.92
509,2023-04-06,40.36
508,2023-04-10,38.67


In [6]:
# Merge all four dataframes on the 'Date' column
df_merged = pd.merge(df1, df2, on='Date', suffixes=('_Lithium', '_SouthAmericaLOB'))
df_merged = pd.merge(df_merged, df3[['Date', 'Price']], on='Date', how='inner')
df_merged = pd.merge(df_merged, df4[['Date', 'Price']], on='Date', how='inner', suffixes=('_Lithium_Hydroxide', '_Lithium_Americas'))

# Rename columns for clarity
df_merged = df_merged.rename(columns={'Price_Lithium_Hydroxide': 'Price_Lithium_Hydroxide',
                                      'Price_Lithium_Americas': 'Price_Lithium_Americas_Corp'})

# Convert all price columns to float
df_merged['Price_Lithium'] = df_merged['Price_Lithium'].astype(float)
df_merged['Price_SouthAmericaLOB'] = df_merged['Price_SouthAmericaLOB'].astype(float)
df_merged['Price_Lithium_Hydroxide'] = df_merged['Price_Lithium_Hydroxide'].astype(float)
df_merged['Price_Lithium_Americas_Corp'] = df_merged['Price_Lithium_Americas_Corp'].astype(float)

# Plot the data with updated legend
fig = px.line(df_merged, x='Date', y=['Price_Lithium', 'Price_SouthAmericaLOB', 'Price_Lithium_Hydroxide', 'Price_Lithium_Americas_Corp'], 
              labels={'value': 'Price (USD/kg)', 'variable': 'Price Source'},
              title='Lithium Prices from Multiple Sources')
fig.update_layout(
    yaxis=dict(title='Price'),
    legend_title_text='Source',
    legend=dict(
        itemsizing='constant',
        traceorder='normal',
        title_font=dict(size=12),
        font=dict(size=10)
    )
)
fig.show()

In [7]:
# Calculate the correlation matrix using the returns
df_merged['Returns_Lithium'] = df_merged['Price_Lithium'].pct_change()
df_merged['Returns_SouthAmericaLOB'] = df_merged['Price_SouthAmericaLOB'].pct_change()
df_merged['Returns_Lithium_Hydroxide'] = df_merged['Price_Lithium_Hydroxide'].pct_change()
df_merged['Returns_Lithium_Americas_Corp'] = df_merged['Price_Lithium_Americas_Corp'].pct_change()
# Drop NaN values
df_merged = df_merged.dropna(subset=['Returns_Lithium', 'Returns_SouthAmericaLOB', 'Returns_Lithium_Hydroxide', 'Returns_Lithium_Americas_Corp'])
# Calculate the correlation matrix
correlation_matrix = df_merged[['Returns_Lithium', 'Returns_SouthAmericaLOB', 'Returns_Lithium_Hydroxide', 'Returns_Lithium_Americas_Corp']].corr()
print("Correlation matrix:")
print(correlation_matrix)


Correlation matrix:
                               Returns_Lithium  Returns_SouthAmericaLOB  \
Returns_Lithium                       1.000000                 0.403262   
Returns_SouthAmericaLOB               0.403262                 1.000000   
Returns_Lithium_Hydroxide             0.316357                 0.223033   
Returns_Lithium_Americas_Corp         0.103941                -0.092861   

                               Returns_Lithium_Hydroxide  \
Returns_Lithium                                 0.316357   
Returns_SouthAmericaLOB                         0.223033   
Returns_Lithium_Hydroxide                       1.000000   
Returns_Lithium_Americas_Corp                   0.288364   

                               Returns_Lithium_Americas_Corp  
Returns_Lithium                                     0.103941  
Returns_SouthAmericaLOB                            -0.092861  
Returns_Lithium_Hydroxide                           0.288364  
Returns_Lithium_Americas_Corp                      

In [8]:
from colorama import Fore, Style

# Perform cointegration test between Price_Lithium and Price_SouthAmericaLOB
score, p_value, _ = coint(df_merged['Price_Lithium'], df_merged['Price_SouthAmericaLOB'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Lithium{Fore.YELLOW} and {Fore.CYAN}Price_SouthAmericaLOB:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_Lithium and Price_Lithium_Hydroxide
score, p_value, _ = coint(df_merged['Price_Lithium'], df_merged['Price_Lithium_Hydroxide'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Lithium{Fore.YELLOW} and {Fore.CYAN}Price_Lithium_Hydroxide:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_SouthAmericaLOB and Price_Lithium_Hydroxide
score, p_value, _ = coint(df_merged['Price_SouthAmericaLOB'], df_merged['Price_Lithium_Hydroxide'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_SouthAmericaLOB{Fore.YELLOW} and {Fore.CYAN}Price_Lithium_Hydroxide:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_Lithium and Price_Lithium_Americas_Corp
score, p_value, _ = coint(df_merged['Price_Lithium'], df_merged['Price_Lithium_Americas_Corp'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Lithium{Fore.YELLOW} and {Fore.CYAN}Price_Lithium_Americas_Corp:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_SouthAmericaLOB and Price_Lithium_Americas_Corp
score, p_value, _ = coint(df_merged['Price_SouthAmericaLOB'], df_merged['Price_Lithium_Americas_Corp'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_SouthAmericaLOB{Fore.YELLOW} and {Fore.CYAN}Price_Lithium_Americas_Corp:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_Lithium_Hydroxide and Price_Lithium_Americas_Corp
score, p_value, _ = coint(df_merged['Price_Lithium_Hydroxide'], df_merged['Price_Lithium_Americas_Corp'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Lithium_Hydroxide{Fore.YELLOW} and {Fore.CYAN}Price_Lithium_Americas_Corp:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

NameError: name 'coint' is not defined

In [22]:
# Export the merged dataframe to a CSV file
# It should contain the Date and all three price columns
df_merged.to_csv('/Users/michal/Documents/Code/metals/data/ALL_lithium_prices.csv', index=False)

# Nickel

In [9]:
# Load Nickel data source #1 - Nickel prices per pound from Dailymetalprice
dfn1 = pd.read_csv('/Users/michal/Documents/Code/metals/data/Nickel_prices_2017-01-01_to_2024-12-31_merged.csv')
dfn1['Date'] = pd.to_datetime(dfn1['Date'])
dfn1['Price'] = dfn1['Price'].astype(float)
dfn1 = dfn1.drop(columns=['Unit'])
dfn1 = dfn1.drop_duplicates(subset=['Date'])
dfn1.head()

Unnamed: 0,Date,Price
0,2017-01-02,4.5196
2,2017-01-03,4.6266
4,2017-01-04,4.5223
6,2017-01-05,4.638
8,2017-01-06,4.6176


In [10]:
# Load Nickel data source #2 - Nickel HLOC from LME
dfn2 = pd.read_csv('/Users/michal/Documents/Code/metals/bloomberg_data/nickel_HLOC_2017_2024.csv', sep=';')
dfn2['Date'] = pd.to_datetime(dfn2['Date'], dayfirst=True)
dfn2 = dfn2.rename(columns={'PX_LAST': 'Price'})
dfn2['Price'] = dfn2['Price'].astype(float)
dfn2 = dfn2.drop(index=0)
dfn2 = dfn2.sort_values(by='Date')
dfn2 = dfn2.iloc[:, :2] 
dfn2['Price'] = dfn2['Price']/1000 # WARNING: DIVIDING BY 1000
dfn2.head()

Unnamed: 0,Date,Price
2014,2016-12-30,10.02
2013,2017-01-03,9.91
2012,2017-01-04,10.22
2011,2017-01-05,10.295
2010,2017-01-06,10.245


In [11]:
# Load Nickel data source #3 - Nickel Miners ETF
dfn3 = pd.read_csv('/Users/michal/Documents/Code/metals/reuters_data/Sprott Nickel Prices ETF.csv', sep=';')
dfn3 = dfn3.rename(columns={'Exchange Date': 'Date'})
dfn3 = dfn3.rename(columns={'Close' : 'Price'})
dfn3['Date'] = pd.to_datetime(dfn3['Date'], format='%d-%b-%Y')
dfn3['Price'] = dfn3['Price'].str.replace(',', '.').astype(float)
dfn3.head()

Unnamed: 0,Date,Price,Net,%Chg,Open,Low,High
0,2023-03-22,20.82,,,2111,2082,2115
1,2023-03-23,20.75,-7.0,"-0,36%",2102,2069,2114
2,2023-03-24,20.79,4.0,"+0,20%",2064,2064,2088
3,2023-03-27,20.8,1.0,"+0,04%",2096,2078,2096
4,2023-03-28,21.51,71.0,"+3,41%",2131,2131,2151


In [12]:
# Merge all three dataframes on the 'Date' column
dfn_merged = pd.merge(dfn1, dfn2, on='Date', suffixes=('_Nickel', '_LME'))
dfn_merged = pd.merge(dfn_merged, dfn3[['Date', 'Price']], on='Date', how='inner')
dfn_merged = dfn_merged.rename(columns={'Price': 'Price_ETF'})

# Convert all price columns to float
dfn_merged['Price_Nickel'] = dfn_merged['Price_Nickel'].astype(float)
dfn_merged['Price_LME'] = dfn_merged['Price_LME'].astype(float)
dfn_merged['Price_ETF'] = dfn_merged['Price_ETF'].astype(float)

# Plot the data with updated legend
fig_nickel = px.line(dfn_merged, x='Date', y=['Price_Nickel', 'Price_LME', 'Price_ETF'], 
                      labels={'value': 'Price (USD/lb)', 'variable': 'Price Source'},
                      title='Nickel Prices from Multiple Sources')
fig_nickel.update_layout(
    yaxis=dict(title='Price'),
    legend_title_text='Source',
    legend=dict(
        itemsizing='constant',
        traceorder='normal',
        title_font=dict(size=12),
        font=dict(size=10)
    )
)
fig_nickel.show()

In [13]:
# Calculate the correlation matrix using the returns
dfn_merged['Returns_Nickel'] = dfn_merged['Price_Nickel'].pct_change()
dfn_merged['Returns_LME'] = dfn_merged['Price_LME'].pct_change()
dfn_merged['Returns_ETF'] = dfn_merged['Price_ETF'].pct_change()
# Drop NaN values
dfn_merged = dfn_merged.dropna(subset=['Returns_Nickel', 'Returns_LME', 'Returns_ETF'])
# Calculate the correlation matrix
correlation_matrix_nickel = dfn_merged[['Returns_Nickel', 'Returns_LME', 'Returns_ETF']].corr()
print("Nickel Correlation matrix:")
print(correlation_matrix_nickel)

Nickel Correlation matrix:
                Returns_Nickel  Returns_LME  Returns_ETF
Returns_Nickel        1.000000     0.653469     0.146581
Returns_LME           0.653469     1.000000     0.231760
Returns_ETF           0.146581     0.231760     1.000000


In [14]:
# Compute the cointegration
from statsmodels.tsa.stattools import coint

# Perform cointegration test between Price_Nickel and Price_LME
score, p_value, _ = coint(dfn_merged['Price_Nickel'], dfn_merged['Price_LME'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Nickel{Fore.YELLOW} and {Fore.CYAN}Price_LME:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_Nickel and Price_ETF
score, p_value, _ = coint(dfn_merged['Price_Nickel'], dfn_merged['Price_ETF'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_Nickel{Fore.YELLOW} and {Fore.CYAN}Price_ETF:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")

# Perform cointegration test between Price_LME and Price_ETF
score, p_value, _ = coint(dfn_merged['Price_LME'], dfn_merged['Price_ETF'])
print(f"{Fore.YELLOW}Cointegration test between {Fore.CYAN}Price_LME{Fore.YELLOW} and {Fore.CYAN}Price_ETF:{Style.RESET_ALL}")
print(f"  - Test score: {Fore.GREEN}{score}{Style.RESET_ALL}")
print(f"  - P-value: {Fore.GREEN}{p_value}{Style.RESET_ALL} (A p-value < 0.05 suggests a significant cointegration relationship)")


[33mCointegration test between [36mPrice_Nickel[33m and [36mPrice_LME:[0m
  - Test score: [32m-5.531267472561759[0m
  - P-value: [32m1.5500669894765108e-05[0m (A p-value < 0.05 suggests a significant cointegration relationship)
[33mCointegration test between [36mPrice_Nickel[33m and [36mPrice_ETF:[0m
  - Test score: [32m-3.2385600855766956[0m
  - P-value: [32m0.06371250531847378[0m (A p-value < 0.05 suggests a significant cointegration relationship)
[33mCointegration test between [36mPrice_LME[33m and [36mPrice_ETF:[0m
  - Test score: [32m-3.175281280386953[0m
  - P-value: [32m0.07415978200653804[0m (A p-value < 0.05 suggests a significant cointegration relationship)


# Cobalt

In [15]:
# Load Cobalt data source #1 - Cobalt prices per pound from Dailymetalprice
dfc1 = pd.read_csv('/Users/michal/Documents/Code/metals/data/Cobalt_prices_2017-01-01_to_2024-12-31_merged.csv')
dfc1['Date'] = pd.to_datetime(dfc1['Date'])
dfc1['Price'] = dfc1['Price'].astype(float)
dfc1 = dfc1.drop(columns=['Unit'])
dfc1 = dfc1.drop_duplicates(subset=['Date'])
dfc1 = dfc1.sort_values(by='Date')
dfc1.head()

Unnamed: 0,Date,Price
0,2017-01-02,14.855
2,2017-01-03,14.742
4,2017-01-04,14.742
6,2017-01-05,14.742
8,2017-01-06,14.742


In [16]:
# Load Cobalt data source #2 - Cobalt HLOC from LME
dfc2 = pd.read_csv('/Users/michal/Documents/Code/metals/bloomberg_data/cobalt_HLOC_2017_2024.csv', sep=';')
dfc2['Date'] = pd.to_datetime(dfc2['Date'], dayfirst=True)
dfc2 = dfc2.rename(columns={'PX_LAST': 'Price'})
# Replace commas with dots and convert the 'Price' column to float
dfc2['Price'] = dfc2['Price'].str.replace(',', '.').astype(float)
dfc2 = dfc2.drop(index=0)
dfc2 = dfc2.sort_values(by='Date')
dfc2 = dfc2.iloc[:, :2]
dfc2['Price'] = dfc2['Price']/1000 # WARNING: DIVIDING BY 1000
dfc2.head()

Unnamed: 0,Date,Price
2019,2016-12-30,32.734
2018,2017-01-03,32.735
2017,2017-01-04,32.736
2016,2017-01-05,32.7395
2015,2017-01-06,32.7405


In [17]:
# Load Cobalt data source #3 - LME 3 Month Cobalt Composite Forward
dfc3 = pd.read_csv('reuters_data/LME 3 Month Cobalt Composite Commodity Forward .csv', sep=';')
dfc3 = dfc3.iloc[:, :2]
dfc3 = dfc3.rename(columns={'Close' : 'Price'})
dfc3['Date'] = pd.to_datetime(dfc3['Date'], format='%d-%b-%Y')
# Clean the 'Price' column by removing non-breaking spaces and replacing commas with dots
dfc3['Price'] = dfc3['Price'].str.replace('\xa0', '').str.replace(',', '.').astype(float)
dfc3['Price'] = dfc3['Price']/1000 # WARNING: DIVIDING BY 1000
dfc3.head()

Unnamed: 0,Date,Price
0,2017-01-03,32.75
1,2017-01-04,32.75
2,2017-01-05,32.75
3,2017-01-06,32.75
4,2017-01-09,32.75


In [18]:
# Merge all three dataframes on the 'Date' column
dfc_merged = pd.merge(dfc1, dfc2, on='Date', suffixes=('_Cobalt', '_LME'))
dfc_merged = pd.merge(dfc_merged, dfc3[['Date', 'Price']], on='Date', how='inner')
dfc_merged = dfc_merged.rename(columns={'Price': 'Price_LME3Month'})
# Convert all price columns to float
dfc_merged['Price_Cobalt'] = dfc_merged['Price_Cobalt'].astype(float)
dfc_merged['Price_LME'] = dfc_merged['Price_LME'].astype(float)
dfc_merged['Price_LME3Month'] = dfc_merged['Price_LME3Month'].astype(float)
# Plot the data with updated legend
fig_cobalt = px.line(dfc_merged, x='Date', y=['Price_Cobalt', 'Price_LME', 'Price_LME3Month'], 
                      labels={'value': 'Price (USD/lb)', 'variable': 'Price Source'},
                      title='Cobalt Prices from Multiple Sources')
fig_cobalt.update_layout(
    yaxis=dict(title='Price'),
    legend_title_text='Source',
    legend=dict(
        itemsizing='constant',
        traceorder='normal',
        title_font=dict(size=12),
        font=dict(size=10)
    )
)
fig_cobalt.show()

In [19]:
# Calculate the correlation matrix using the returns
dfc_merged['Returns_Cobalt'] = dfc_merged['Price_Cobalt'].pct_change()
dfc_merged['Returns_LME'] = dfc_merged['Price_LME'].pct_change()
dfc_merged['Returns_LME3Month'] = dfc_merged['Price_LME3Month'].pct_change()
# Drop NaN values
dfc_merged = dfc_merged.dropna(subset=['Returns_Cobalt', 'Returns_LME', 'Returns_LME3Month'])
# Calculate the correlation matrix
correlation_matrix_cobalt = dfc_merged[['Returns_Cobalt', 'Returns_LME', 'Returns_LME3Month']].corr()
print("Cobalt Correlation matrix:")
print(correlation_matrix_cobalt)

Cobalt Correlation matrix:
                   Returns_Cobalt  Returns_LME  Returns_LME3Month
Returns_Cobalt           1.000000     0.609940           0.600684
Returns_LME              0.609940     1.000000           0.992943
Returns_LME3Month        0.600684     0.992943           1.000000


In [20]:
# Compute the cointegration
from statsmodels.tsa.stattools import coint
# Perform cointegration test between Price_Cobalt and Price_LME
score, p_value, _ = coint(dfc_merged['Price_Cobalt'], dfc_merged['Price_LME'])
print(f"Cointegration test between Price_Cobalt and Price_LME:")
print(f"  - Test score: {score}")
print(f"  - P-value: {p_value} (A p-value < 0.05 suggests a significant cointegration relationship)")
# Perform cointegration test between Price_Cobalt and Price_LME3Month
score, p_value, _ = coint(dfc_merged['Price_Cobalt'], dfc_merged['Price_LME3Month'])
print(f"Cointegration test between Price_Cobalt and Price_LME3Month:")
print(f"  - Test score: {score}")
print(f"  - P-value: {p_value} (A p-value < 0.05 suggests a significant cointegration relationship)")
# Perform cointegration test between Price_LME and Price_LME3Month
score, p_value, _ = coint(dfc_merged['Price_LME'], dfc_merged['Price_LME3Month'])
print(f"Cointegration test between Price_LME and Price_LME3Month:")
print(f"  - Test score: {score}")
print(f"  - P-value: {p_value} (A p-value < 0.05 suggests a significant cointegration relationship)")

Cointegration test between Price_Cobalt and Price_LME:
  - Test score: -5.056597911247561
  - P-value: 0.00012938844512649987 (A p-value < 0.05 suggests a significant cointegration relationship)
Cointegration test between Price_Cobalt and Price_LME3Month:
  - Test score: -5.620235459840751
  - P-value: 1.0218630724614348e-05 (A p-value < 0.05 suggests a significant cointegration relationship)
Cointegration test between Price_LME and Price_LME3Month:
  - Test score: -4.026539986609557
  - P-value: 0.006579658948068363 (A p-value < 0.05 suggests a significant cointegration relationship)


# Copper

In [21]:
# Load Copper data source #1 - Copper prices per pound from Dailymetalprice
dfcu1 = pd.read_csv('/Users/michal/Documents/Code/metals/data/Copper_prices_2017-01-01_to_2024-12-31_merged.csv')
dfcu1['Date'] = pd.to_datetime(dfcu1['Date'])
dfcu1['Price'] = dfcu1['Price'].astype(float)
dfcu1 = dfcu1.drop(columns=['Unit'])
dfcu1 = dfcu1.drop_duplicates(subset=['Date'])
dfcu1 = dfcu1.sort_values(by='Date')
dfcu1.head()

Unnamed: 0,Date,Price
0,2017-01-03,2.481
4,2017-01-04,2.548
8,2017-01-05,2.5295
12,2017-01-06,2.538
16,2017-01-09,2.53


In [22]:
# L