In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import coint
from colorama import Fore, Style
import warnings
warnings.filterwarnings('ignore')

## 1. Data Loading

Load full cubic spline datasets for all metals.

In [2]:
# Define metals
metal_names = ['cobalt', 'copper', 'lithium', 'nickel']

# Load full datasets
print("="*60)
print("LOADING DATASETS")
print("="*60 + "\n")

metal_data = {}
for metal in metal_names:
    df = pd.read_csv(f'data/ALL_{metal}_prices_cubic_spline.csv', 
                     parse_dates=['Date'], index_col='Date')
    
    # Remove LISAME series if present
    if 'LISAME' in df.columns:
        df = df.drop(columns=['LISAME'])
        print(f"{metal.capitalize()}: LISAME excluded")
    
    metal_data[metal.capitalize()] = df
    print(f"{metal.capitalize()}: {df.shape[0]} rows, {df.shape[1]} series")
    print(f"  Columns: {df.columns.tolist()}")
    print()

LOADING DATASETS

Cobalt: 3876 rows, 6 series
  Columns: ['CODALY', 'COLMEX', 'COLMEA', 'COWUXI', 'COCOMX', 'COSMMS']

Copper: 2342 rows, 6 series
  Columns: ['CUDALY', 'CUCOMX', 'CULMEX', 'CUSMMG', 'CUSHFE', 'CUETFC']

Lithium: LISAME excluded
Lithium: 2202 rows, 6 series
  Columns: ['LIDALY', 'LICOMX', 'LILAMC', 'LIEALC', 'LIEABG', 'LILMEX']

Nickel: 2857 rows, 6 series
  Columns: ['NIDALY', 'NILMEX', 'NIETFN', 'NISHFE', 'NIWUXI', 'NIINDA']



## 2. Compute Log-Prices

Convert prices to log-prices for cointegration testing.

In [3]:
print("="*60)
print("COMPUTING LOG-PRICES")
print("="*60 + "\n")

metal_returns = {}
for metal in metal_names:
    df = metal_data[metal.capitalize()].copy()
    # Compute log-prices
    df_log_prices = np.log(df)
    metal_returns[metal.capitalize()] = df_log_prices
    print(f"{metal.capitalize()} log-prices: {df_log_prices.shape[0]} rows, {df_log_prices.shape[1]} series")

print("\nLog-prices will be used for cointegration analysis.")

COMPUTING LOG-PRICES

Cobalt log-prices: 3876 rows, 6 series
Copper log-prices: 2342 rows, 6 series
Lithium log-prices: 2202 rows, 6 series
Nickel log-prices: 2857 rows, 6 series

Log-prices will be used for cointegration analysis.


## 3. Configuration

Define series pairs and ticker mappings for each metal.

In [4]:
# Ticker mappings for LaTeX output (maps column names to display names)
ticker_mappings = {
    'Cobalt': {
        'CODALY': 'CODALY',
        'COLMEX': 'COLMEX',
        'COLME3': 'COLME3',
        'COLMEA': 'COLMEA',
        'COWUXI': 'COWUXI',
        'COCOMX': 'COCOMX',
        'COSMMS': 'COSMMS'
    },
    'Copper': {
        'CUDALY': 'CUDALY',
        'CULMEX': 'CULMEX',
        'CULME3': 'CULME3',
        'CULMEA': 'CULMEA',
        'CUWUXI': 'CUWUXI',
        'CUCOMX': 'CUCOMX',
        'CUSMMS': 'CUSMMS'
    },
    'Lithium': {
        'LIDALY': 'LIDALY',
        'LILMEX': 'LILMEX',
        'LILME3': 'LILME3',
        'LILMEA': 'LILMEA',
        'LIWUXI': 'LIWUXI',
        'LICOMX': 'LICOMX',
        'LISMMS': 'LISMMS'
    },
    'Nickel': {
        'NIDALY': 'NIDALY',
        'NILMEX': 'NILMEX',
        'NILME3': 'NILME3',
        'NILMEA': 'NILMEA',
        'NIWUXI': 'NIWUXI',
        'NICOMX': 'NICOMX',
        'NISMMS': 'NISMMS'
    }
}

# Define series pairs for testing (adjust based on actual column names)
# This is a template - will be constructed dynamically for each metal
def get_series_pairs(columns):
    """Generate all unique pairwise combinations of series."""
    pairs = []
    cols = [c for c in columns if c != 'LISAME']  # Exclude LISAME
    for i, col1 in enumerate(cols):
        for col2 in cols[i+1:]:
            pairs.append((col1, col2))
    return pairs

## 4. Helper Functions

In [5]:
def run_cointegration_tests(df, pairs, metal_name):
    """
    Run Engle-Granger cointegration tests for specified pairs.
    
    Parameters:
    - df: DataFrame with series as columns
    - pairs: List of (series1, series2) tuples
    - metal_name: Name of the metal for reporting
    
    Returns:
    - List of (series1, series2, score, p_value) tuples
    """
    results = []
    
    # Get list of all series involved
    all_series = set()
    for s1, s2 in pairs:
        all_series.add(s1)
        all_series.add(s2)
    
    # Drop rows with NaN in any of the required series
    df_clean = df[list(all_series)].dropna()
    
    print(f"\n{Fore.YELLOW}{metal_name} - Testing {len(pairs)} pairs{Style.RESET_ALL}")
    print(f"Data range: {df_clean.index.min()} to {df_clean.index.max()}")
    print(f"Observations: {len(df_clean)}\n")
    
    for series1, series2 in pairs:
        try:
            score, p_value, _ = coint(df_clean[series1], df_clean[series2])
            results.append((series1, series2, score, p_value))
        except Exception as e:
            print(f"Error testing {series1} vs {series2}: {e}")
            results.append((series1, series2, np.nan, np.nan))
    
    return results


def print_results_table(results, metal_name):
    """Print cointegration results as a formatted console table."""
    print(f"\n{Fore.YELLOW}{metal_name} Cointegration Test Results:{Style.RESET_ALL}")
    print(f"{Fore.CYAN}{'Series 1':<15}{'Series 2':<15}{'Score':<15}{'P-Value':<15}{Style.RESET_ALL}")
    
    for series1, series2, score, p_value in results:
        if pd.notna(p_value):
            p_value_color = Fore.GREEN if p_value < 0.05 else Style.RESET_ALL
            print(f"{series1:<15}{series2:<15}{Fore.GREEN}{score:<15.4f}{p_value_color}{p_value:<15.4f}{Style.RESET_ALL}")
        else:
            print(f"{series1:<15}{series2:<15}{'N/A':<15}{'N/A':<15}")


def print_latex_table(results, metal_name, ticker_map):
    """Generate LaTeX table for cointegration results."""
    print(f"\n{Fore.CYAN}LaTeX Table for {metal_name}:{Style.RESET_ALL}\n")
    
    print("\\begin{table}[ht]")
    print("\\centering")
    print("{\\small")
    print("\\begin{tabular}{llrr}")
    print("\\toprule")
    print("Series 1 & Series 2 & Score & P-Value \\\\")
    print("\\midrule")
    
    for series1, series2, score, p_value in results:
        if pd.notna(p_value):
            # Get ticker symbols from mapping
            ticker1 = ticker_map.get(series1, series1.replace("_", "\\_"))
            ticker2 = ticker_map.get(series2, series2.replace("_", "\\_"))
            
            # Format p-value with green color if significant
            p_value_str = f"\\textcolor{{green}}{{{p_value:.4f}}}" if p_value < 0.05 else f"{p_value:.4f}"
            
            print(f"{ticker1} & {ticker2} & {score:.4f} & {p_value_str} \\\\")
    
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}")
    print(f"\\caption{{Cointegration Test Results - {metal_name}}}")
    print(f"\\label{{tab:cointegration_{metal_name.lower()}}}")
    print("\\end{table}")
    print()

## 5. Cobalt Cointegration Analysis (Log Prices)

In [6]:
metal = 'Cobalt'
df = metal_returns[metal]

# Generate all pairwise combinations
pairs = get_series_pairs(df.columns)

print(f"{'='*60}")
print(f"{metal.upper()} - COINTEGRATION ANALYSIS")
print(f"{'='*60}")

# Run tests
results = run_cointegration_tests(df, pairs, metal)

# Print results
print_results_table(results, metal)

# Print LaTeX table
print_latex_table(results, metal, ticker_mappings[metal])

COBALT - COINTEGRATION ANALYSIS

[33mCobalt - Testing 15 pairs[0m
Data range: 2023-05-04 00:00:00 to 2025-07-21 00:00:00
Observations: 662


[33mCobalt Cointegration Test Results:[0m
[36mSeries 1       Series 2       Score          P-Value        [0m
CODALY         COLMEX         [32m-4.8000        [32m0.0004         [0m
CODALY         COLMEA         [32m-1.9932        [0m0.5321         [0m
CODALY         COWUXI         [32m-4.2143        [32m0.0035         [0m
CODALY         COCOMX         [32m-5.4731        [32m0.0000         [0m
CODALY         COSMMS         [32m-2.7274        [0m0.1899         [0m
COLMEX         COLMEA         [32m-2.1705        [0m0.4396         [0m
COLMEX         COWUXI         [32m-4.5162        [32m0.0012         [0m
COLMEX         COCOMX         [32m-5.2229        [32m0.0001         [0m
COLMEX         COSMMS         [32m-2.9268        [0m0.1286         [0m
COLMEA         COWUXI         [32m0.0104         [0m0.9861         [0

## 6. Copper Cointegration Analysis (Log Prices)

In [7]:
metal = 'Copper'
df = metal_returns[metal]

# Generate all pairwise combinations
pairs = get_series_pairs(df.columns)

print(f"{'='*60}")
print(f"{metal.upper()} - COINTEGRATION ANALYSIS")
print(f"{'='*60}")

# Run tests
results = run_cointegration_tests(df, pairs, metal)

# Print results
print_results_table(results, metal)

# Print LaTeX table
print_latex_table(results, metal, ticker_mappings[metal])

COPPER - COINTEGRATION ANALYSIS

[33mCopper - Testing 15 pairs[0m
Data range: 2024-03-06 00:00:00 to 2025-10-31 00:00:00
Observations: 432


[33mCopper Cointegration Test Results:[0m
[36mSeries 1       Series 2       Score          P-Value        [0m
CUDALY         CUCOMX         [32m-8.5209        [32m0.0000         [0m
CUDALY         CULMEX         [32m-2.8382        [0m0.1538         [0m
CUDALY         CUSMMG         [32m-3.3114        [0m0.0532         [0m
CUDALY         CUSHFE         [32m-3.2345        [0m0.0643         [0m
CUDALY         CUETFC         [32m-2.5930        [0m0.2397         [0m
CUCOMX         CULMEX         [32m-1.8401        [0m0.6101         [0m
CUCOMX         CUSMMG         [32m-3.4643        [32m0.0357         [0m
CUCOMX         CUSHFE         [32m-3.3865        [32m0.0439         [0m
CUCOMX         CUETFC         [32m-2.6937        [0m0.2019         [0m
CULMEX         CUSMMG         [32m-3.1840        [0m0.0726         [0m


## 7. Lithium Cointegration Analysis (Log Prices)

In [8]:
metal = 'Lithium'
df = metal_returns[metal]

# Generate all pairwise combinations
pairs = get_series_pairs(df.columns)

print(f"{'='*60}")
print(f"{metal.upper()} - COINTEGRATION ANALYSIS")
print(f"{'='*60}")

# Run tests
results = run_cointegration_tests(df, pairs, metal)

# Print results
print_results_table(results, metal)

# Print LaTeX table
print_latex_table(results, metal, ticker_mappings[metal])

LITHIUM - COINTEGRATION ANALYSIS

[33mLithium - Testing 15 pairs[0m
Data range: 2023-10-02 00:00:00 to 2025-10-31 00:00:00
Observations: 556


[33mLithium Cointegration Test Results:[0m
[36mSeries 1       Series 2       Score          P-Value        [0m
LIDALY         LICOMX         [32m-2.8186        [0m0.1599         [0m
LIDALY         LILAMC         [32m-3.2928        [0m0.0557         [0m
LIDALY         LIEALC         [32m-1.9592        [0m0.5497         [0m
LIDALY         LIEABG         [32m-1.9779        [0m0.5400         [0m
LIDALY         LILMEX         [32m-2.7093        [0m0.1962         [0m
LICOMX         LILAMC         [32m-2.2095        [0m0.4195         [0m
LICOMX         LIEALC         [32m-2.0793        [0m0.4872         [0m
LICOMX         LIEABG         [32m-2.4302        [0m0.3107         [0m
LICOMX         LILMEX         [32m-6.2300        [32m0.0000         [0m
LILAMC         LIEALC         [32m-2.7879        [0m0.1696         [0m

## 8. Nickel Cointegration Analysis (Log Prices)

In [9]:
metal = 'Nickel'
df = metal_returns[metal]

# Generate all pairwise combinations
pairs = get_series_pairs(df.columns)

print(f"{'='*60}")
print(f"{metal.upper()} - COINTEGRATION ANALYSIS")
print(f"{'='*60}")

# Run tests
results = run_cointegration_tests(df, pairs, metal)

# Print results
print_results_table(results, metal)

# Print LaTeX table
print_latex_table(results, metal, ticker_mappings[metal])

NICKEL - COINTEGRATION ANALYSIS

[33mNickel - Testing 15 pairs[0m
Data range: 2023-03-22 00:00:00 to 2025-07-21 00:00:00
Observations: 698


[33mNickel Cointegration Test Results:[0m
[36mSeries 1       Series 2       Score          P-Value        [0m
NIDALY         NILMEX         [32m-7.9676        [32m0.0000         [0m
NIDALY         NIETFN         [32m-3.2278        [0m0.0654         [0m
NIDALY         NISHFE         [32m-2.7166        [0m0.1937         [0m
NIDALY         NIWUXI         [32m-2.5521        [0m0.2566         [0m
NIDALY         NIINDA         [32m-4.0748        [32m0.0056         [0m
NILMEX         NIETFN         [32m-3.0892        [0m0.0905         [0m
NILMEX         NISHFE         [32m-3.1959        [0m0.0706         [0m
NILMEX         NIWUXI         [32m-3.1225        [0m0.0839         [0m
NILMEX         NIINDA         [32m-4.9144        [32m0.0002         [0m
NIETFN         NISHFE         [32m-2.1696        [0m0.4401         [0m


## 9. Summary

**Methodology:**
- Engle-Granger cointegration test performed on log-prices
- Tests all unique pairwise combinations within each metal
- LISAME series excluded from analysis
- Log transformation applied to price levels: log(P_t)

**Interpretation:**
- **P-value < 0.05** (highlighted in green): Statistically significant cointegration at 5% level
- Lower p-values indicate stronger evidence of cointegration
- Cointegrated series tend to move together in the long run despite short-term deviations
- Testing on log-prices is appropriate for cointegration analysis as it stabilizes variance and handles exponential growth

**Note:** Cointegration is tested within each metal only, not across different metals.