In [5]:
import json
import glob
import re
import pandas as pd

# Get files
files = glob.glob("test_3_MNIST_edmd_dmps_*.ipynb")
try:
    files = sorted(files, key=lambda x: int(x.split('_')[-1].split('.')[0]))
except:
    pass

data = []

for fpath in files:
    fname = fpath.split('\\')[-1]
    
    latent_dim = None
    num_epochs = None
    max_dm_samples = None
    autoencoder_type = None
    batch_size = None
    dm_m = None
    spread_factor = None
    
    try:
        with open(fpath, 'r', encoding='utf-8-sig') as f:
            nb = json.load(f)
    except Exception as e:
        print(f"Error reading {fpath}: {e}")
        continue

    full_text = ""
    for cell in nb['cells']:
        if cell['cell_type'] == 'code':
            source = "".join(cell['source'])
            full_text += source + "\n"
    
    # Extract latent_dim
    m = re.search(r'^latent_dim\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: latent_dim = int(m.group(1))

    # Extract num_epochs
    m = re.search(r'^num_epochs\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: num_epochs = int(m.group(1))
    
    # Extract max_dm_samples
    m = re.search(r'^max_dm_samples\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: max_dm_samples = int(m.group(1))
    
    # Extract batch_size
    m = re.search(r'^batch_size\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: batch_size = int(m.group(1))
    
    # Extract m (DM eigenvectors)
    m_dm = re.search(r'^m\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m_dm: dm_m = int(m_dm.group(1))
    
    # Extract SPREAD_FACTOR
    m_sf = re.search(r'^SPREAD_FACTOR\s*=\s*([\d\.]+)', full_text, re.MULTILINE)
    if m_sf: spread_factor = float(m_sf.group(1))

    # Extract autoencoder type
    # Check what is uncommented
    if re.search(r'^autoencoder\s*=\s*CNNAutoencoder', full_text, re.MULTILINE):
        autoencoder_type = "CNN"
    elif re.search(r'^autoencoder\s*=\s*MLPAutoencoder', full_text, re.MULTILINE):
        autoencoder_type = "MLP"
        
    entry = {
        "filename": fname,
        "latent_dim": latent_dim,
        "num_epochs": num_epochs,
        "max_dm_samples": max_dm_samples,
        "batch_size": batch_size,
        "dm_m": dm_m,
        "spread": spread_factor,
        "AE_Type": autoencoder_type
    }
    data.append(entry)

df = pd.DataFrame(data)
print(df.to_string())


                         filename  latent_dim  num_epochs  max_dm_samples  batch_size  dm_m  spread AE_Type
0  test_3_MNIST_edmd_dmps_0.ipynb           6          60           20000         128    15     1.0     CNN
1  test_3_MNIST_edmd_dmps_1.ipynb           6         100            5000         128    15     1.0     CNN
2  test_3_MNIST_edmd_dmps_2.ipynb           8         100            5000         128    15     1.0     CNN
3  test_3_MNIST_edmd_dmps_3.ipynb           8         100            5000         128    15     1.0     CNN
4  test_3_MNIST_edmd_dmps_4.ipynb           6         100           20000         128    15     1.0     CNN
5  test_3_MNIST_edmd_dmps_5.ipynb           6          60           20000         128    15     1.0     CNN


In [3]:
import json
import glob
import re
import pandas as pd

files = glob.glob("test_3_MNIST_edmd_dmps_*.ipynb")
try:
    files = sorted(files, key=lambda x: int(x.split('_')[-1].split('.')[0]))
except:
    pass

data = []

for fpath in files:
    fname = fpath.split('\\')[-1]
    
    latent_dim = None
    num_epochs = None
    max_dm_samples = None
    autoencoder_type = None
    batch_size = None
    m_particles = None
    step_size = None
    n_dict = None
    seed = None
    
    try:
        with open(fpath, 'r', encoding='utf-8-sig') as f:
            nb = json.load(f)
    except Exception as e:
        print(f"Error reading {fpath}: {e}")
        continue

    full_text = ""
    for cell in nb['cells']:
        if cell['cell_type'] == 'code':
            source = "".join(cell['source'])
            full_text += source + "\n"
    
    # Extract params
    m = re.search(r'^latent_dim\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: latent_dim = int(m.group(1))

    m = re.search(r'^num_epochs\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: num_epochs = int(m.group(1))
    
    m = re.search(r'^max_dm_samples\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: max_dm_samples = int(m.group(1))
    
    m = re.search(r'^batch_size\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: batch_size = int(m.group(1))

    m = re.search(r'^m_particles\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: m_particles = int(m.group(1))

    m = re.search(r'^step_size\s*=\s*([\d\.]+)', full_text, re.MULTILINE)
    if m: step_size = float(m.group(1))

    m = re.search(r'^n_dict_components\s*=\s*(\d+)', full_text, re.MULTILINE)
    if m: n_dict = int(m.group(1))
    
    # Random see
    m = re.search(r'np\.random\.seed\((\d+)\)', full_text)
    if m: seed = int(m.group(1))

    if re.search(r'^autoencoder\s*=\s*CNNAutoencoder', full_text, re.MULTILINE):
        autoencoder_type = "CNN"
    elif re.search(r'^autoencoder\s*=\s*MLPAutoencoder', full_text, re.MULTILINE):
        autoencoder_type = "MLP"
        
    entry = {
        "filename": fname,
        "latent_dim": latent_dim,
        "epochs": num_epochs,
        "dm_samples": max_dm_samples,
        "particles": m_particles,
        "step": step_size,
        "n_dict": n_dict,
        "seed": seed
    }
    data.append(entry)

df = pd.DataFrame(data)
print(df.to_string())


                           filename  latent_dim  epochs  dm_samples  particles  step  n_dict  seed
0    test_3_MNIST_edmd_dmps_0.ipynb           6      60       20000       64.0   0.1     100     1
1    test_3_MNIST_edmd_dmps_1.ipynb           6     100        5000       64.0   0.2     200     1
2    test_3_MNIST_edmd_dmps_2.ipynb           8     100        5000       64.0   0.2     200     1
3    test_3_MNIST_edmd_dmps_3.ipynb           8      60        5000        NaN   NaN     300     1
4    test_3_MNIST_edmd_dmps_4.ipynb           8     100        5000       64.0   0.2     200     1
5    test_3_MNIST_edmd_dmps_5.ipynb           8      60        5000        NaN   NaN     300     1
6    test_3_MNIST_edmd_dmps_6.ipynb           8     100        5000       64.0   0.2     200     1
7    test_3_MNIST_edmd_dmps_7.ipynb           8     100        5000       64.0   0.2     200     1
8    test_3_MNIST_edmd_dmps_8.ipynb           6     100       20000       64.0   0.2     200     1
9    test_