# cMLP; F = 40; T = 1000; Layers = 5


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!git clone https://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@github.com/Proton1121/ngcausality.git

Cloning into 'ngcausality'...
remote: Enumerating objects: 220, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 220 (delta 25), reused 8 (delta 8), pack-reused 185 (from 1)[K
Receiving objects: 100% (220/220), 2.74 MiB | 27.21 MiB/s, done.
Resolving deltas: 100% (101/101), done.


In [None]:
%cd /content/ngcausality

/content/ngcausality


In [None]:
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data.synthetic import simulate_lorenz_96
from data.dream import generate_causal_matrix
from models.cmlp import cMLP, train_model_ista

In [None]:
save_dir = '/content/drive/MyDrive/ngcausality_results/' + 'loren_f40_t1000_layer5/'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [None]:
# For GPU acceleration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Simulate data
X_np, GC = simulate_lorenz_96(p=20, F=40, T=1000)
X = torch.tensor(X_np[np.newaxis], dtype=torch.float32, device=device)

In [None]:
# Save the simulated data to Google Drive
np.save(os.path.join(save_dir, 'X_np.npy'), X_np)  # Save X_np (simulated data)
np.save(os.path.join(save_dir, 'GC.npy'), GC)  # Save GC (Granger causality matrix)

torch.save(X, os.path.join(save_dir, 'X_tensor.pt'))

with open(os.path.join(save_dir, 'data_shapes.txt'), 'w') as f:
    f.write(f'Shape of X_np: {X_np.shape}\n')
    f.write(f'Shape of GC: {GC.shape}\n')
    f.write(f'Shape of X (torch tensor): {X.shape}\n')

In [None]:
# Plot data
fig, axarr = plt.subplots(1, 2, figsize=(16, 5))
axarr[0].plot(X_np)
axarr[0].set_xlabel('T')
axarr[0].set_title('Entire time series')
axarr[1].plot(X_np[:50])
axarr[1].set_xlabel('T')
axarr[1].set_title('First 50 time points')
plt.tight_layout()


# Step 5: Save the plot to Google Drive
plot_filename = os.path.join(save_dir, 'data_plots.png')
plt.savefig(plot_filename)  # Save the plot as a PNG file in Google Drive

# Optionally, close the plot to prevent it from displaying in the notebook (you can skip this if you want to see it in the notebook)
plt.close()

In [None]:
for i in range(20):
  save_dir = '/content/drive/MyDrive/ngcausality_results/' + 'loren_f40_t1000_layer5/lam=' + str(1+i) + '/'

  if not os.path.exists(save_dir):
    os.makedirs(save_dir)

  #Set up model
  cmlp = cMLP(X.shape[-1], hidden=[10,10,10,10,10], lag=5).to(device=device)

  # Train with ISTA
  train_loss_list = train_model_ista(
    cmlp, X, lam=(1+i), lam_ridge=1e-6, lr=1e-5, penalty='H', max_iter=50000,
    check_every=100)

  # Loss function plot
  plt.figure(figsize=(8, 5))
  train_loss_np = [loss.cpu().detach().numpy() for loss in train_loss_list]
  plt.plot(50 * np.arange(len(train_loss_np)), train_loss_np)
  plt.title('cMLP training')
  plt.ylabel('Loss')
  plt.xlabel('Training steps')
  plt.tight_layout()
  loss_plot_path = os.path.join(save_dir, f'loss_plot_{1+i}.png')
  plt.savefig(loss_plot_path)  # Save the loss plot to Google Drive
  plt.close()  # Close the plot to prevent it from displaying

  # Verify learned Granger causality
  GC_est = cmlp.GC().cpu().data.numpy()

  results_file_path = os.path.join(save_dir, f'gc_results_{i}.txt')
  with open(results_file_path, 'w') as f:
    f.write(f'True variable usage = {100 * np.mean(GC)}%\n')
    f.write(f'Estimated variable usage = {100 * np.mean(GC_est)}%\n')
    f.write(f'Accuracy = {100 * np.mean(GC == GC_est)}%\n')
    f.write(f'True positives = {np.sum((GC == 1) & (GC_est == 1))}\n')
    f.write(f'True negatives = {np.sum((GC == 0) & (GC_est == 0))}\n')
    f.write(f'False positives = {np.sum((GC == 0) & (GC_est == 1))}\n')
    f.write(f'False negatives = {np.sum((GC == 1) & (GC_est == 0))}\n')

  # Make figures for Granger causality matrices
  fig, axarr = plt.subplots(1, 2, figsize=(16, 5))
  axarr[0].imshow(GC, cmap='Blues')
  axarr[0].set_title('GC actual')
  axarr[0].set_ylabel('Affected series')
  axarr[0].set_xlabel('Causal series')
  axarr[0].set_xticks([])
  axarr[0].set_yticks([])

  axarr[1].imshow(GC_est, cmap='Blues', vmin=0, vmax=1, extent=(0, len(GC_est), len(GC_est), 0))
  axarr[1].set_title('GC estimated')
  axarr[1].set_ylabel('Affected series')
  axarr[1].set_xlabel('Causal series')
  axarr[1].set_xticks([])
  axarr[1].set_yticks([])

  # Mark disagreements
  for i in range(len(GC_est)):
    for j in range(len(GC_est)):
        if GC[i, j] != GC_est[i, j]:
            rect = plt.Rectangle((j, i-0.05), 1, 1, facecolor='none', edgecolor='red', linewidth=1)
            axarr[1].add_patch(rect)

  gc_plot_path = os.path.join(save_dir, f'gc_plot_{i}.png')
  plt.savefig(gc_plot_path)  # Save the GC plot to Google Drive
  plt.close()  # Close the plot to prevent it from displaying

  # Verify lag selection
  for i in range(len(GC_est)):
    # Get true GC
    GC_lag = np.zeros((5, len(GC_est)))
    GC_lag[:3, GC[i].astype(bool)] = 1.0

    # Get estimated GC
    GC_est_lag = cmlp.GC(ignore_lag=False, threshold=False)[i].cpu().data.numpy().T[::-1]

    # Make figures for lag-based GC
    fig, axarr = plt.subplots(1, 2, figsize=(16, 5))
    axarr[0].imshow(GC_lag, cmap='Blues', extent=(0, len(GC_est), 5, 0))
    axarr[0].set_title(f'Series {i + 1} true GC')
    axarr[0].set_ylabel('Lag')
    axarr[0].set_xlabel('Series')
    axarr[0].set_xticks(np.arange(len(GC_est)) + 0.5)
    axarr[0].set_xticklabels(range(len(GC_est)))
    axarr[0].set_yticks(np.arange(5) + 0.5)
    axarr[0].set_yticklabels(range(1, 5 + 1))
    axarr[0].tick_params(axis='both', length=0)

    axarr[1].imshow(GC_est_lag, cmap='Blues', extent=(0, len(GC_est), 5, 0))
    axarr[1].set_title(f'Series {i + 1} estimated GC')
    axarr[1].set_ylabel('Lag')
    axarr[1].set_xlabel('Series')
    axarr[1].set_xticks(np.arange(len(GC_est)) + 0.5)
    axarr[1].set_xticklabels(range(len(GC_est)))
    axarr[1].set_yticks(np.arange(5) + 0.5)
    axarr[1].set_yticklabels(range(1, 5 + 1))
    axarr[1].tick_params(axis='both', length=0)

    # Mark nonzeros
    for k in range(len(GC_est)):
        for j in range(5):
            if GC_est_lag[j, k] > 0.0:
                rect = plt.Rectangle((k, j), 1, 1, facecolor='none', edgecolor='green', linewidth=1.0)
                axarr[1].add_patch(rect)

    lag_gc_plot_path = os.path.join(save_dir, f'lag_gc_plot_{j}_{i}.png')
    plt.savefig(lag_gc_plot_path)  # Save lag GC plot to Google Drive
    plt.close()  # Close the plot to prevent it from displaying


----------Iter = 100----------
Loss = 204.551559
Variable usage = 100.00%
----------Iter = 200----------
Loss = 202.003098
Variable usage = 100.00%
----------Iter = 300----------
Loss = 199.387512
Variable usage = 100.00%
----------Iter = 400----------
Loss = 196.657669
Variable usage = 100.00%
----------Iter = 500----------
Loss = 194.027924
Variable usage = 100.00%
----------Iter = 600----------
Loss = 191.308945
Variable usage = 100.00%
----------Iter = 700----------
Loss = 188.128464
Variable usage = 100.00%
----------Iter = 800----------
Loss = 184.263504
Variable usage = 100.00%
----------Iter = 900----------
Loss = 179.983978
Variable usage = 100.00%
----------Iter = 1000----------
Loss = 176.134491
Variable usage = 100.00%
----------Iter = 1100----------
Loss = 172.631134
Variable usage = 100.00%
----------Iter = 1200----------
Loss = 169.210434
Variable usage = 100.00%
----------Iter = 1300----------
Loss = 165.697876
Variable usage = 100.00%
----------Iter = 1400----------
Lo

NameError: name 'cdlinear' is not defined