# 🎓 Lotka-Volterra Competition Model: Gold vs Bitcoin**Bachelor Thesis - Frankfurt School**Kevin Brot & Ivan Miraglia | November 2025---## 📋 Research Question**Do Gold and Bitcoin compete for investor capital?**Using Lotka-Volterra competition model from ecology adapted to finance.---## Notebook Overview| Step | Task | Output ||------|------|--------|| 0 | Import libraries | Environment ready || 1 | Load data | 3 datasets || 2 | Interpolate & align | Time series || 3 | Normalize | Scaled data || 4 | Select alpha | Optimal smoothing || 5 | Train LV model | Coefficients || 6 | Significance tests | P-values || 7 | Predictions | Forecasts || 8 | Baselines | Comparison models || 9 | Evaluate | Final results |

## 📦 Step 0: Import Libraries

In [102]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import json
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge
import warnings
warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

print("="*80)
print(" IMPROVED LOTKA-VOLTERRA MODEL - ADDRESSING CRITICAL ISSUES")
print("="*80)

 IMPROVED LOTKA-VOLTERRA MODEL - ADDRESSING CRITICAL ISSUES


## ⚙️ Configuration

## 📂 Step 1: Load Data

## 🔄 Step 2: Interpolate & Align

### 📊 Market Caps Visualization

In [None]:
fig, ax = plt.subplots(2,1, figsize=(14,10))
ax[0].plot(df.index, df['Gold_USD']/1e12, 'gold', lw=2)
ax[0].set_title('Gold Market Cap', fontsize=14, weight='bold')
ax[0].set_ylabel('Trillion USD')
ax[0].grid(True, alpha=0.3)
ax[1].plot(df.index, df['BTC_USD']/1e9, 'orange', lw=2)
ax[1].set_title('Bitcoin Market Cap', fontsize=14, weight='bold')
ax[1].set_ylabel('Billion USD')
ax[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTDIR/'caps.png', dpi=300)
plt.show()
print(f"Gold: ${df['Gold_USD'].iloc[-1]/1e12:.2f}T | BTC: ${df['BTC_USD'].iloc[-1]/1e9:.2f}B")

## 🔢 Step 3: Normalize

In [106]:
print("\n" + "="*80)
print(" STEP 3: NORMALIZING DATA")
print("="*80)

gold_median = df["Gold_USD"].median()
btc_median = df["BTC_USD"].median()

df["Gold_Scaled"] = df["Gold_USD"] / gold_median
df["BTC_Scaled"] = df["BTC_USD"] / btc_median

print(f"\nGold median: ${gold_median/1e9:.2f}B")
print(f"BTC median:  ${btc_median/1e9:.2f}B")


 STEP 3: NORMALIZING DATA

Gold median: $0.00B
BTC median:  $129.97B


## 🛠️ Helpers

In [107]:
def exp_smooth(x, alpha=0.3):
    """Simple exponential smoothing"""
    Z = np.zeros_like(x)
    Z[0] = x[0]
    for t in range(1, len(x)):
        Z[t] = alpha * x[t] + (1 - alpha) * Z[t-1]
    return Z

def predict_lv_ridge(Z_current, Z_other, model):
    """Predict using Ridge regression model"""
    X = np.column_stack([Z_current, Z_current * Z_other])
    return model.predict(X)

## 🎯 Step 4: Alpha Selection

In [109]:
# Test different alpha values using VALIDATION set
alphas_to_test = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
cv_results = []

print("\nCross-validating alpha values on VALIDATION set:")
print("-" * 80)

for alpha_test in alphas_to_test:
    # Smooth with this alpha
    Z_gold = exp_smooth(df["Gold_Scaled"].values, alpha=alpha_test)
    Z_btc = exp_smooth(df["BTC_Scaled"].values, alpha=alpha_test)
    
    # Train on training set with Ridge regression
    Z_gold_train = Z_gold[train_idx][:-1]
    Z_btc_train = Z_btc[train_idx][:-1]
    gold_next_train = df["Gold_Scaled"].values[np.array(train_idx)[1:]]
    
    X_gold_train = np.column_stack([Z_gold_train, Z_gold_train * Z_btc_train])
    
    # Use Ridge regression to reduce overfitting
    ridge_model = Ridge(alpha=RIDGE_ALPHA, fit_intercept=False)
    ridge_model.fit(X_gold_train, gold_next_train)
    
    # Validate on validation set - FIX: proper indexing
    val_preds = []
    val_actuals = []
    
    for i in val_idx:
        if i == 0 or i-1 < 0:
            continue
        pred = ridge_model.predict([[Z_gold[i-1], Z_gold[i-1] * Z_btc[i-1]]])[0]
        actual = df["Gold_Scaled"].values[i]
        val_preds.append(pred)
        val_actuals.append(actual)
    
    val_rmse = np.sqrt(mean_squared_error(val_actuals, val_preds))
    val_mae = mean_absolute_error(val_actuals, val_preds)
    
    cv_results.append({
        'alpha': alpha_test,
        'val_rmse': val_rmse,
        'val_mae': val_mae,
        'model': ridge_model,
        'Z_gold': Z_gold,
        'Z_btc': Z_btc
    })
    
    print(f"  α={alpha_test:.1f}: Val RMSE={val_rmse:.4f}, Val MAE={val_mae:.4f}")


Cross-validating alpha values on VALIDATION set:
--------------------------------------------------------------------------------
  α=0.1: Val RMSE=0.1459, Val MAE=0.1243
  α=0.2: Val RMSE=0.1088, Val MAE=0.0910
  α=0.3: Val RMSE=0.0908, Val MAE=0.0738
  α=0.4: Val RMSE=0.0801, Val MAE=0.0634
  α=0.5: Val RMSE=0.0732, Val MAE=0.0573
  α=0.6: Val RMSE=0.0684, Val MAE=0.0530
  α=0.7: Val RMSE=0.0650, Val MAE=0.0498
  α=0.8: Val RMSE=0.0626, Val MAE=0.0475


## 🏋️ Step 5: Train Model

In [111]:
print("\n" + "="*80)
print(" STEP 5: TRAINING FINAL MODEL WITH REGULARIZATION (FIX #2)")
print("="*80)

# Train on TRAIN set only
Z_gold_train = Z_gold_final[train_idx][:-1]
Z_btc_train = Z_btc_final[train_idx][:-1]
gold_next_train = df["Gold_Scaled"].values[np.array(train_idx)[1:]]
btc_next_train = df["BTC_Scaled"].values[np.array(train_idx)[1:]]

print(f"\nTraining on {len(Z_gold_train)} samples")
print(f"Using Ridge regression with alpha={RIDGE_ALPHA}")

# Gold model
X_gold_train = np.column_stack([Z_gold_train, Z_gold_train * Z_btc_train])
gold_model = Ridge(alpha=RIDGE_ALPHA, fit_intercept=False)
gold_model.fit(X_gold_train, gold_next_train)
a_gold, C_gold_btc = gold_model.coef_

print(f"\n--- GOLD PARAMETERS ---")
print(f"  a (growth):        {a_gold:.4f}")
print(f"  C (BTC effect):    {C_gold_btc:.4f}")
if C_gold_btc > 0.01:
    print(f"  → Competition: BTC crowds out Gold")
elif C_gold_btc < -0.01:
    print(f"  → Complementarity: Assets move together")
else:
    print(f"  → Weak interaction")

# BTC model
X_btc_train = np.column_stack([Z_btc_train, Z_btc_train * Z_gold_train])
btc_model = Ridge(alpha=RIDGE_ALPHA, fit_intercept=False)
btc_model.fit(X_btc_train, btc_next_train)
a_btc, C_btc_gold = btc_model.coef_

print(f"\n--- BTC PARAMETERS ---")
print(f"  a (growth):        {a_btc:.4f}")
print(f"  C (Gold effect):   {C_btc_gold:.4f}")


 STEP 5: TRAINING FINAL MODEL WITH REGULARIZATION (FIX #2)

Training on 218 samples
Using Ridge regression with alpha=0.02

--- GOLD PARAMETERS ---
  a (growth):        1.0012
  C (BTC effect):    0.0053
  → Weak interaction

--- BTC PARAMETERS ---
  a (growth):        1.0301
  C (Gold effect):   -0.0289


## 📈 Step 6: Significance

In [122]:
import statsmodels.api as sm

print("\n" + "="*80)
print(" STATISTICAL SIGNIFICANCE TEST (p-values)")
print("="*80)

# Re-estimate using OLS to get p-values
print("\n[Gold Model - OLS for Statistical Inference]")
X_gold_ols = np.column_stack([Z_gold_train, Z_gold_train * Z_btc_train])
ols_gold = sm.OLS(gold_next_train, X_gold_ols).fit()

print("\nCoefficients:")
print(f"  a (growth):     {ols_gold.params[0]:.4f}  (p-value: {ols_gold.pvalues[0]:.4f})")
print(f"  C (BTC effect): {ols_gold.params[1]:.4f}  (p-value: {ols_gold.pvalues[1]:.4f})")

# Check significance
if ols_gold.pvalues[0] < 0.05:
    print(f"  ✓ Growth coefficient is statistically significant (p < 0.05)")
else:
    print(f"  ⚠ Growth coefficient is NOT significant (p = {ols_gold.pvalues[0]:.4f})")

if ols_gold.pvalues[1] < 0.05:
    print(f"  ✓ Interaction coefficient is statistically significant (p < 0.05)")
    print(f"    → Finding of complementarity/competition is ROBUST")
else:
    print(f"  ⚠ Interaction coefficient is NOT significant (p = {ols_gold.pvalues[1]:.4f})")
    print(f"    → Cannot conclude statistically significant interaction")

print(f"\nR-squared (in-sample): {ols_gold.rsquared:.4f}")

# Same for BTC
print("\n[BTC Model - OLS for Statistical Inference]")
X_btc_ols = np.column_stack([Z_btc_train, Z_btc_train * Z_gold_train])
ols_btc = sm.OLS(btc_next_train, X_btc_ols).fit()

print("\nCoefficients:")
print(f"  a (growth):      {ols_btc.params[0]:.4f}  (p-value: {ols_btc.pvalues[0]:.4f})")
print(f"  C (Gold effect): {ols_btc.params[1]:.4f}  (p-value: {ols_btc.pvalues[1]:.4f})")

if ols_btc.pvalues[1] < 0.05:
    print(f"  ✓ Interaction coefficient is statistically significant")
else:
    print(f"  ⚠ Interaction coefficient is NOT significant (p = {ols_btc.pvalues[1]:.4f})")


 STATISTICAL SIGNIFICANCE TEST (p-values)

[Gold Model - OLS for Statistical Inference]

Coefficients:
  a (growth):     1.0015  (p-value: 0.0000)
  C (BTC effect): 0.0051  (p-value: 0.0958)
  ✓ Growth coefficient is statistically significant (p < 0.05)
  ⚠ Interaction coefficient is NOT significant (p = 0.0958)
    → Cannot conclude statistically significant interaction

R-squared (in-sample): 0.9994

[BTC Model - OLS for Statistical Inference]

Coefficients:
  a (growth):      1.0355  (p-value: 0.0000)
  C (Gold effect): -0.0339  (p-value: 0.5242)
  ⚠ Interaction coefficient is NOT significant (p = 0.5242)


## 🔮 Step 7: Predict

In [112]:
print("\n" + "="*80)
print(" STEP 6: GENERATING PREDICTIONS")
print("="*80)

df["Gold_Pred"] = np.nan
df["BTC_Pred"] = np.nan

for i in range(len(df) - 1):
    X_gold = [[Z_gold_final[i], Z_gold_final[i] * Z_btc_final[i]]]
    X_btc = [[Z_btc_final[i], Z_btc_final[i] * Z_gold_final[i]]]
    
    df.iloc[i+1, df.columns.get_loc("Gold_Pred")] = gold_model.predict(X_gold)[0]
    df.iloc[i+1, df.columns.get_loc("BTC_Pred")] = btc_model.predict(X_btc)[0]

print(f"✓ Predictions generated")


 STEP 6: GENERATING PREDICTIONS
✓ Predictions generated


## 📊 Step 8: Baselines

In [113]:
print("\n" + "="*80)
print(" STEP 7: BASELINE MODEL COMPARISONS (FIX #3)")
print("="*80)

# Baseline 1: Persistence (naive forecast)
df["Gold_Baseline_Persistence"] = df["Gold_Scaled"].shift(1)
df["BTC_Baseline_Persistence"] = df["BTC_Scaled"].shift(1)

# Baseline 2: Moving Average (4-week)
df["Gold_Baseline_MA"] = df["Gold_Scaled"].rolling(window=4, min_periods=1).mean().shift(1)
df["BTC_Baseline_MA"] = df["BTC_Scaled"].rolling(window=4, min_periods=1).mean().shift(1)

# Baseline 3: Exponential Moving Average
ema_alpha = 0.3
df["Gold_Baseline_EMA"] = df["Gold_Scaled"].ewm(alpha=ema_alpha, adjust=False).mean().shift(1)
df["BTC_Baseline_EMA"] = df["BTC_Scaled"].ewm(alpha=ema_alpha, adjust=False).mean().shift(1)

print("✓ Baseline models created:")
print("  1. Persistence (x_t+1 = x_t)")
print("  2. Moving Average (4-week)")
print("  3. Exponential Moving Average")


 STEP 7: BASELINE MODEL COMPARISONS (FIX #3)
✓ Baseline models created:
  1. Persistence (x_t+1 = x_t)
  2. Moving Average (4-week)
  3. Exponential Moving Average


## 📏 Step 9: Evaluate

In [114]:
def calc_metrics(actual, pred, mask=None):
    """Calculate RMSE, MAE, MAPE"""
    if mask is None:
        mask = ~(np.isnan(actual) | np.isnan(pred))
    actual_clean = actual[mask]
    pred_clean = pred[mask]
    
    if len(actual_clean) == 0:
        return {"RMSE": np.nan, "MAE": np.nan, "MAPE": np.nan}
    
    rmse = np.sqrt(mean_squared_error(actual_clean, pred_clean))
    mae = mean_absolute_error(actual_clean, pred_clean)
    mape = np.mean(np.abs((actual_clean - pred_clean) / np.abs(actual_clean))) * 100
    
    return {"RMSE": rmse, "MAE": mae, "MAPE": mape}

## ✅ SummaryCheck your results:- **P-values < 0.05?** → Significant interaction- **LV beats persistence?** → Forecasting value- **C > 0:** Competition | **C < 0:** Complementarity