In [None]:
import matplotlib.pyplot as plt
import json
import numpy as np 
from matplotlib.font_manager import FontProperties

# try:
#     plt.rcParams["text.usetex"] = True
#     plt.rcParams["text.latex.preamble"] = r"\usepackage{amsmath}"
# except Exception:
#     print("Warning: LaTeX not found, using default text rendering.")
#     plt.rcParams["text.usetex"] = False

plt.rcParams["font.family"] = "serif"
bold_times = FontProperties(family='Times New Roman', size=16, weight='bold')

## Plot Out-of-Sample figure

In [None]:
save_path = "results/240/results.json"
with open(save_path, "r") as f:
    loaded_results = json.load(f)

results = {float(alpha): vals for alpha, vals in loaded_results.items()}

alpha_list = sorted(results.keys())
x_vals = [1 - a for a in alpha_list] 

cost_matrix = np.array([results[a]['costs'] for a in alpha_list])  # shape: (len_alpha, K)
print(cost_matrix.shape)
cost_matrix = cost_matrix.T  # shape: (K, len_alpha)

mean_costs = np.mean(cost_matrix, axis=0)
print(mean_costs.shape)
lower_quantile = np.quantile(cost_matrix, 0.1, axis=0)
upper_quantile = np.quantile(cost_matrix, 0.9, axis=0)

coverage = [np.mean(results[a]['reliabilities']) for a in alpha_list]

best_idx = np.argmin(mean_costs)
best_x = x_vals[best_idx]
best_y = mean_costs[best_idx]

fig, ax1 = plt.subplots(figsize=(8, 6))
ax1.plot(x_vals, mean_costs, 'b-', label='Average Loss')
ax1.fill_between(x_vals, lower_quantile, upper_quantile, alpha=0.35, color='blue')

# === mark outliers ===
for j, a in enumerate(alpha_list):
    q10, q90 = lower_quantile[j], upper_quantile[j]
    outliers = [c for c in results[a]['costs'] if (c < q10 or c > q90)]
    if outliers: 
        ax1.scatter([x_vals[j]]*len(outliers), outliers,
                    color='purple', s=20, alpha=0.6, label='_nolegend_')
# === mark optimal solution ===
ax1.plot(best_x, best_y, marker='*', markersize=20,
        markeredgecolor='black', markeredgewidth=2,
        color='magenta', label='Lowest Mean', linestyle='None')
ax1.set_yticks([-4500, -4000, -3500, -3000, -2500, -2000])
ax1.set_xticks(x_vals)

index_of_zero = x_vals.index(0.0)
str_nums = [f"{x:.2f}" for x in x_vals]
str_nums[index_of_zero] = 'SAA'
ax1.set_xticklabels(str_nums, rotation=90)

ax1.annotate(f'{best_y:.2f}', xy=(best_x, best_y), xytext=(-30, 15),textcoords='offset points',
            color='black', fontsize=16)
ax1.set_xlabel(r'$1-\alpha$', fontproperties=bold_times)
ax1.set_ylabel("Out-of-sample performance", color='blue', fontproperties=bold_times)

ax1.tick_params(axis='y', labelcolor='blue')

ax1.grid(which='major', alpha=0.4)
ax1.grid(which='minor', alpha=0.2)

# Set the horizontal grid to blue
ax1.yaxis.grid(True, color='blue', linestyle='-', linewidth=0.1)

ax2 = ax1.twinx()
ax2.plot(x_vals, coverage, 'r--', label='Coverage Probability')
ax2.set_yticks([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax2.set_ylabel('Coverage Probability', color='red', fontproperties=bold_times)
ax2.tick_params(axis='y', labelcolor='red')

fig.tight_layout()
plt.show()

## Plot time figure

In [None]:
time_path = ''
with open(time_path, "r") as f:
    time_list = json.load(f)

color_map = {
    120: 'red',
    240: 'blue',
    480: 'green'
}

linestyle_map = {
    'extensive form': '-',  # solid line
    'ours': '--',  # dash line
}

for method in time_list:
    for data_size in time_list[method]:
        m_dict = time_list[method][data_size]
        m_sizes = sorted(m_dict.keys())
        times = [m_dict[m] for m in m_sizes]
        label = f"{method}, N={data_size}"
        plt.plot(
            m_sizes,
            times,
            label=label,
            color=color_map.get(data_size, 'gray'),
            linestyle=linestyle_map.get(method, '-')
        )

plt.xlabel('bootstrap size', fontproperties=bold_times)
plt.ylabel('Time (s)', fontproperties=bold_times)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [10]:
from params_generator import ParametersGenerator
from utils import sample_from_config
import os, pickle 

cfg_path = "config.yaml"
train_samples_list = []
pg = ParametersGenerator()
test_samples = pg.generate_parameters(samples=sample_from_config(cfg_path, train=False))
for _ in range(200):
    train_samples = pg.generate_parameters(samples=sample_from_config(cfg_path, train=True))
    train_samples_list.append(train_samples)
sample_data = {
    'train_samples': train_samples_list,
    'test_samples': test_samples
}
os.makedirs("samples", exist_ok=True)
with open("samples/samples2.pkl", "wb") as f:
    pickle.dump(sample_data, f)
    print("Samples saved to samples/samples2.pkl")

Samples saved to samples/samples2.pkl


## Merged Samples

In [11]:
import pickle
import numpy as np

# Load the first sample file
with open("samples/480/data.pkl", "rb") as f:
    data1 = pickle.load(f)

# Load the second sample file
with open("samples/e200_480.pkl", "rb") as f:
    data2 = pickle.load(f)

def merge_epoch_data(epoch1, epoch2):
    merged = {}
    # Merge h, W, q arrays but keep T from epoch1
    merged['T'] = epoch1['T']  # Keep T from data1 (120/data.pkl)
    for key in ['h', 'W', 'q']:
        if key in epoch1 and key in epoch2:
            merged[key] = np.concatenate([epoch1[key], epoch2[key]], axis=0)
    return merged

# Create merged data for train samples
merged_train_samples = []
for epoch1, epoch2 in zip(data1['train_samples'], data2['train_samples']):
    merged_epoch = merge_epoch_data(epoch1, epoch2)
    merged_train_samples.append(merged_epoch)

# Create merged data
merged_data = {
    'train_samples': merged_train_samples,
    'test_samples': data1['test_samples']
}

# Save the merged data
with open("samples/merged_samples.pkl", "wb") as f:
    pickle.dump(merged_data, f)

print("Successfully merged samples1.pkl and samples2.pkl into merged_samples.pkl")

# Print some info about the merged data
print(f"Number of epochs in merged data: {len(merged_data['train_samples'])}")
if len(merged_data['train_samples']) > 0:
    print("Samples per epoch:")
    for key in ['h', 'T','W','q']:
        if key in merged_data['train_samples'][0]:
            print(f"  {key}: {merged_data['train_samples'][0][key].shape}")
print("\nTest samples:")
for key in ['h','T', 'W','q']:
    if key in merged_data['test_samples']:
        print(f"  {key}: {merged_data['test_samples'][key].shape}")

Successfully merged samples1.pkl and samples2.pkl into merged_samples.pkl
Number of epochs in merged data: 200
Samples per epoch:
  h: (960, 9)
  T: (9, 20)
  W: (960, 9, 24)
  q: (960, 24)

Test samples:
  h: (2000, 9)
  T: (9, 20)
  W: (2000, 9, 24)
  q: (2000, 24)


## Merged Epochs

In [None]:
import pickle
import numpy as np

# Load the first file (140 epochs)
with open("samples/merged_samples.pkl", "rb") as f:
    data1 = pickle.load(f)

# Load the second file (60 epochs)
with open("results/240/data.pkl", "rb") as f:
    data2 = pickle.load(f)

# Combine train samples from both files
merged_train_samples = data1['train_samples'][:200-60] + data2['train_samples']  # Take first 140 epochs from data1 and all 60 from data2

# Create merged data structure
merged_data = {
    'train_samples': merged_train_samples,
    'test_samples': data2['test_samples']  # Keep test samples from data2 (120/data.pkl)
}

# Save the merged data
output_path = "samples/e200_240.pkl"
with open(output_path, "wb") as f:
    pickle.dump(merged_data, f)

print(f"Successfully created merged file with 200 epochs at {output_path}")
print(f"Number of epochs in merged data: {len(merged_data['train_samples'])}")

# Print sample information from first epoch
if len(merged_data['train_samples']) > 0:
    print(len(merged_data['train_samples']))
    print("\nFirst epoch sample sizes:")
    for key in merged_data['train_samples'][0].keys():
        if isinstance(merged_data['train_samples'][0][key], np.ndarray):
            print(f"  {key}: {merged_data['train_samples'][0][key].shape}")

print("\nTest samples sizes:")
for key in merged_data['test_samples'].keys():
    if isinstance(merged_data['test_samples'][key], np.ndarray):
        print(f"  {key}: {merged_data['test_samples'][key].shape}")
