# This notebook contains the scripts used to run the experiments and generate the datas for the plots.

All experiment blocks are commented out by default to avoid re-running long computations or accidentally overwriting existing result files.  
To execute specific scans, simply uncomment the corresponding code sections.

This notebook contains the scripts used to generate experimental runs that produce the data for all plots.  
It includes parameter sweeps for different variables (such as $s$, $p$, $d$, $K$), and explores variations across sampling strategies or ground-truth settings.

## Table of Contents

- [Runs vs $s$](#runs-vs-s)
  - [Runs vs $s$ by $p$](#runs-vs-s-by-p)
  - [Runs vs $s$ by $k$](#runs-vs-s-by-k)
  - [Runs for $p \cdot k = \mathrm{const}$](#runs-for-p-cdot-k--const)
- [Runs vs $p$](#runs-vs-p)
  - [Runs for $p \cdot s = \mathrm{const}$](#runs-for-p-cdot-s--const)
  - [Runs for $p$ vs $d$](#runs-for-p-vs-d)
- [Runs for Strategies](#runs-for-strategies)
  - [Runs for strategies vs $s$](#runs-for-strategies-vs-s)
  - [Runs for strategies vs $p$](#runs-for-strategies-vs-p)
- [Runs for Ground Truth Analysis](#runs-for-ground-truth-analysis)
  - [Runs vs $p$](#runs-vs-p-1)
  - [Runs vs $d$](#runs-vs-d)


# Runs vs $s$

## Runs vs $s$ by $p$

In [None]:
import numpy as np
import torch
from structure import parameter_scan

# === Experiment settings ===
n = 1000
m = 1000
d = 2
lr = [1e-3]
wd = [5e-6, 5e-3]
num_epochs = 30
reps = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Sweep parameters ===
K = [1]
p = [0.1, 0.15, 0.2, 0.25, 0.35, 0.5]

# Sweep over s
s = np.concatenate([
    np.logspace(-1, 1, 20),     # from 10^-1 to 10^1
    [1e-4, 1e-3, 1e-2],         # specific small values
    np.logspace(1, 2, 10)       # from 10^1 to 10^2
])

filename = "Data_3/scan_K1_fixedLR_varS_varP_full_4.pkl"

# === Launch scan (uncomment to run) ===
# results = parameter_scan(
#     n=n,
#     m=m,
#     d=d,
#     p=p,
#     K=K,
#     lr=lr,
#     weight_decay=wd,
#     num_epochs=num_epochs,
#     reps=reps,
#     s=s,
#     device=device,
#     linear=False,
#     save_path=filename,
#     save_every=4,
#     soft_label=True
# )


## Runs vs $s$ by $k$

In [None]:
import numpy as np
import torch
from structure import parameter_scan

# === Parameters ===
n = 1000
m = 1000
d = 2
lr = 1e-3
weight_decay = [1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3]
num_epochs = 30
reps = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Sweep for s
s = np.concatenate([
    np.logspace(-1, 1, 20),
    [1e-4, 1e-3, 1e-2],
    np.logspace(1, 3, 10)
])

K = [1, 2, 4, 10, 50]
p = 0.2
filename = "Data_final/scan_K_logspaceS_wdScan_p0.2_centered_soft_label_True_2.pkl"

# === Run the scan with soft labels ===
# results = parameter_scan(
#     n=n,
#     m=m,
#     d=d,
#     p=p,
#     K=K,
#     lr=lr,
#     weight_decay=weight_decay,
#     num_epochs=num_epochs,
#     reps=reps,
#     s=s,
#     device=device,
#     linear=False,
#     save_path=filename,
#     save_every=4,
#     soft_label=True
# )


# Runs for $p\cdot k=const$

In [None]:
import torch
import pickle
from structure import parameter_scan

# === General parameters ===
n = m = 1000
d = 2
s = [1.0, 3, 5, 8]
num_epochs = 30
reps = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Target values for product p*K ===
target_constants = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 1]
possible_K = [1, 2, 3, 4, 5, 7, 10]
lr = 0.001

# === Generate (p, K) pairs approximating the target constants
p_values, K_values = [], []
for c in target_constants:
    for K in possible_K:
        p = round(c / K, 5)
        if p <= 1:
            p_values.append(p)
            K_values.append(K)

# Repeat s values to match the number of (p, K) pairs
s_values = []
for i in range(len(s)):
    s_values.extend([s[i]] * len(p_values))

p_values = p_values * len(s)  # Repeat for each s
K_values = K_values * len(s)  # Repeat for each s

print(f"Total experiments: {len(p_values)}")
filename = "Data_final/scan_pK_constant_Final_s_wd_sweep.pkl"

# === Run experiments
# results = parameter_scan(
#     n=n,
#     m=m,
#     d=d,
#     p=p_values,
#     K=K_values,
#     lr=lr,
#     weight_decay=1e-5,
#     num_epochs=num_epochs,
#     reps=reps,
#     s=s_values,
#     device=device,
#     linear=True,
#     save_every=4,
#     save_path=filename,
#     soft_label=True
# )

# === Load and enrich results with p*K for grouping in plots
# with open(filename, "rb") as f:
#     results = pickle.load(f)

# for exp in results:
#     p = exp['params']['p']
#     K = exp['params']['K']
#     exp['params']['pxK'] = round(p * K, 4)

# === Save enriched results
# with open(filename, "wb") as f:
#     pickle.dump(results, f)
# print(f"✅ Results saved to {filename}")


# Runs vs $p$

In [None]:
from structure import parameter_scan
from visualization import *

# === General parameters ===
n = m = 1000
d = 2
s = 5.0
num_epochs = 30
reps = 5
device = "cpu"

K_values = [1, 2, 3, 5, 10]
p_values = np.concatenate([
    np.logspace(-2, np.log10(0.2), 20),  # from 10^-2 to 0.2
    [1e-4, 5e-3, 1e-3, 0.5, 0.8]         # additional specific values
])
lr = 0.001

filename = "Data_final/scan_pK_Final.pkl"

# === Run experiments
# results = parameter_scan(
#     n=n,
#     m=m,
#     d=d,
#     p=p_values,
#     K=K_values,
#     lr=lr,
#     weight_decay=1e-5,
#     num_epochs=num_epochs,
#     reps=reps,
#     s=s,
#     device=device,
#     linear=False,
#     save_every=4,
#     save_path=filename,
#     soft_label=True
# )


# Runs for $p\cdot s = const$

In [None]:
import torch
from structure import *
from visualization import *
from collections import Counter

# === Fixed parameters ===
n = m = 1000
d = 2
num_epochs = 30
reps = 5
K = 1
lr = [1e-3] 
wd = 1e-5
device = "cpu"

# === Ranges for scanning s and p ===
possible_s = [round(s, 3) for s in torch.arange(0.02, 10.1, 0.01).tolist()]
possible_p = [round(p, 5) for p in torch.arange(0.001, 0.301, 0.001).tolist()]
# print(possible_s)

# === Target p*s products ===
target_constants = [0.5, 0.35, 0.25, 0.20, 0.12, 0.15]

# === Generate matching (p, s) pairs such that p * s ≈ constant ===
p_values = []
s_values = []

for c in target_constants:
    for s in possible_s:
        p = round(c / s, 5)
        if p in possible_p:
            p_values.append(p)
            s_values.append(s)

# === Expand learning rates to match number of experiments ===
lr_values = []
for i in range(len(lr)):
    lr_values.extend([lr[i]] * len(p_values))

p_values = p_values * len(lr)  # Repeat for each learning rate
s_values = s_values * len(lr)  # Repeat for each learning rate

print("Generated (p, s) pairs such that p*s = constant:")
for p, s in zip(p_values, s_values):
    print(f"  p={p}, s={s}, p*s={round(p*s, 5)}")
print(len(p_values), len(s_values), len(lr_values))

# === Optional: count coverage per constant value
# coverage = Counter(round(p * s, 5) for p, s in zip(p_values, s_values))
# print("\nCoverage (number of values per p*s constant):")
# for k, v in sorted(coverage.items()):
#     print(f"  p*s={k:.5f} → {v} values")

filename = "Data_final/scan_ps_constant_Final.pkl"

# === Run parameter scan (synchronized linear scan)
# results = parameter_scan(
#     n=n, m=m, d=d,
#     p=p_values,
#     s=s_values,
#     lr=lr_values,
#     weight_decay=wd,
#     num_epochs=num_epochs,
#     reps=reps,
#     device=device,
#     K=K,
#     linear=True,
#     save_path=filename,
#     save_every=4,
#     soft_label=True
# )


# Runs for $p$ vs $d$

In [None]:
import torch
from structure import parameter_scan

# === Fixed hyperparameters ===
n = m = 1000
num_epochs = 30
reps = 1
s = 5
reps = 5  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Values to scan ===
p_values = [0.1, 0.2, 0.5, 0.8, 1.0]
d_values = list(range(2, 11, 2))
lr = 1e-3  
wd = 1e-5  
K = 1
filename = "Data_final/p_d_1.pkl"

# === Run parameter scan
# results = parameter_scan(
#     n=n,
#     m=m,
#     d=d_values,
#     p=p_values,
#     lr=lr,
#     weight_decay=wd,
#     num_epochs=num_epochs,
#     reps=reps,
#     s=s,
#     device=device,
#     K=K,
#     filename=filename,
#     save_every=4,
# )


# Runs for Strategies

## Runs for strategies vs $s$

In [None]:
import numpy as np
import torch
import os
from structure import parameter_scan

# === General parameters ===
os.makedirs("Data_strategies", exist_ok=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
strategies = ["proximity", "margin", "variance", "popularity", "top_k", "cluster", "svd"]

# === Fixed parameters ===
default_params = {
    "n": 1000,
    "m": 1000,
    "d": 2,
    "p": 0.2,
    "lr": 1e-3,
    "num_epochs": 30,
    "reps": 3,
    "K": 1,
}

# === Values to scan ===
scan_s = np.concatenate([
    np.logspace(-1, 1, 20),
    [1e-4, 1e-3, 1e-2],
    np.logspace(1, 4, 10)
])
wd_sweep = [1e-6, 1e-5, 1e-4]

# === Launch experiments for each strategy ===
# for strategy in strategies:
#     filename = f"Data_strategies/run_vs_s_K1_{strategy}_wd_sweep.pkl"
#     results = parameter_scan(
#         **default_params,
#         s=scan_s,
#         weight_decay=wd_sweep,
#         strategy=strategy,
#         device=device,
#         linear=False,
#         save_path=filename,
#         save_every=5,
#         soft_label=False
#     )
#     print(f"✅ Saved: {filename}")


## Runs for strategies vs $p$

In [None]:
import numpy as np
import torch
import os
from structure import parameter_scan

# === Setup ===
os.makedirs("Data_strategies", exist_ok=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
strategies = ["random", "proximity", "margin", "variance", "popularity", "top_k", "svd"]

# === Fixed parameters ===
default_params = {
    "n": 1000,
    "m": 1000,
    "d": 2,
    "lr": 1e-3,
    "weight_decay": 1e-5,
    "num_epochs": 30,
    "reps": 3,
    "s": 5,
    "K": 1,
}

# === Values to scan for p ===
p_list = np.round(np.logspace(-2, np.log10(0.2), 20), 4).tolist()  # 20 values from 0.01 to 0.2

# === Launch parameter scans ===
# for strategy in strategies:
#     filename = f"Data_strategies/run_vs_p_{strategy}_2.pkl"
#     results = parameter_scan(
#         **default_params,
#         p=p_list,
#         strategy=strategy,
#         device=device,
#         linear=False,
#         save_path=filename,
#         save_every=5,
#         soft_label=True
#     )
#     print(f"✅ Saved: {filename}")


# Runs for Ground Truth Analysis

## Runs vs $p$

In [None]:
from structure import parameter_scan_ground_truth
import numpy as np
import pickle

# === Experiment configuration ===
n = 1000
m = 1000
p = np.logspace(-4, 0, 30)  # from 1e-4 to 1.0
s = 5                     # scaling factor
d = 2                     # latent dimension
device = "cpu"
reps = 5
K = [10, 1]

# === Run the experiment (uncomment to re-run)
# results = parameter_scan_ground_truth(
#     n=n, m=m, d=d, p=p, s=s, device=device,
#     K=K, reps=reps, linear=False, 
# )

# === Save results to Data_final/ with a clear filename
# with open("Data_final/gt_scan_s5_Ksweep_pSweep_n1000.pkl", "wb") as f:
#     pickle.dump(results, f)
# print("✅ Results saved to Data_final/gt_scan_s5_Ksweep_pSweep_n1000.pkl")


## Runs vs $d$

In [None]:
from structure import parameter_scan_ground_truth
import numpy as np
import matplotlib.pyplot as plt

# === Fixed configuration ===
n = 1000
m = 1000
p = 0.5
s = [1, 3, 9]
d = [1, 2, 3, 4, 5, 6, 7]
device = "cpu"
reps = 3
K = 1

# === Run the scan
# results = parameter_scan_ground_truth(
#     n=n, m=m, p=p, d=d, s=s, device=device,
#     K=K, linear=False, reps=reps
# )

# === Save the results
# import pickle
# with open("Data_final/scan_d_s_gt.pkl", "wb") as f:
#     pickle.dump(results, f)
