In [None]:
import os

REPO = "SolarFlareZ/aml-project"
BRANCH = "main"
PROJECT_DIR = "/content/aml-project"

if os.path.exists(PROJECT_DIR):
    print("repo exists, pulling last commit")
    %cd {PROJECT_DIR}
    !git checkout {BRANCH}
    !git pull origin {BRANCH}
else:
    print("cloning repo...")
    !git clone -b {BRANCH} https://github.com/{REPO}.git
    %cd {PROJECT_DIR}

!pip install -q -r requirements.txt

print(f"\curr dir: {os.getcwd()}")

In [None]:
!python -m src.train \
    trainer.max_epochs=5 \
    experiment_name=quick_test \
    callbacks.early_stopping.patience=5

In [None]:
# !python -m src.optimize hp_search.n_trials=12

In [None]:
# import pandas as pd

# RESULTS_DIR = f"{PROJECT_DIR}/results"

# df = pd.read_csv(f"{RESULTS_DIR}/optuna_results.csv")
# best_trial = df.loc[df['value'].idxmax()]

# BEST_LR = best_trial['params_lr']
# BEST_MOMENTUM = best_trial['params_momentum']
# BEST_BATCH_SIZE = int(best_trial['params_batch_size'])

# print("best hparams:")
# print(f"    lr: {BEST_LR:.6f}")
# print(f"    momentum: {BEST_MOMENTUM:.4f}")
# print(f"    batch_size: {BEST_BATCH_SIZE}")
# print(f"    val_acc: {best_trial['value']:.4f}")

In [None]:
# os.system(
#     f"python  -m src.train "
#     f"optimizer.lr={BEST_LR} "
#     f"optimizer.momentum={BEST_MOMENTUM} "
#     f"data.batch_size={BEST_BATCH_SIZE} "
#     f"trainer.max_epochs=100 "
#     f"experiment_name=centralized_baseline"
# )

os.system(
    f"python -m src.train "
    f"optimizer.lr=0.01 "
    f"optimizer.momentum=0.9 "
    f"data.batch_size=128 "
    f"trainer.max_epochs=100 "
    f"experiment_name=centralized_baseline"
)

In [None]:
# Only run if Cell 5 was interrupted

# os.system(
#     f"python -m src.train "
#     f"optimizer.lr={BEST_LR} "
#     f"optimizer.momentum={BEST_MOMENTUM} "
#     f"data.batch_size={BEST_BATCH_SIZE} "
#     f"trainer.max_epochs=100 "
#     f"experiment_name=centralized_baseline "
#     f"resume_from=./results/checkpoints/last.ckpt"
# )

os.system(
    f"python -m src.train "
    f"optimizer.lr=0.01 "
    f"optimizer.momentum=0.9 "
    f"data.batch_size=128 "
    f"trainer.max_epochs=100 "
    f"experiment_name=centralized_baseline"
    f"resume_from=./results/checkpoints/last.ckpt"
)

In [None]:
# AI generated plotting, I'm terrible at plotting so if someone wants to review, feel free
import pandas as pd
import matplotlib.pyplot as plt

LOG_DIR = f"{PROJECT_DIR}/results/logs"
RESULTS_DIR = f"{PROJECT_DIR}/results"

experiment_name = "centralized_baseline"
log_versions = sorted(os.listdir(f"{LOG_DIR}/{experiment_name}"))
latest_version = log_versions[-1]
metrics_path = f"{LOG_DIR}/{experiment_name}/{latest_version}/metrics.csv"

print(f"Loading: {metrics_path}")
metrics = pd.read_csv(metrics_path)

# Separate metrics
train_metrics = metrics[metrics['train_loss'].notna()].copy()
val_metrics = metrics[metrics['val_loss'].notna()].copy()
test_metrics = metrics[metrics['test_acc'].notna()].copy()

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss
axes[0].plot(train_metrics['epoch'], train_metrics['train_loss'], label='Train', marker='.')
axes[0].plot(val_metrics['epoch'], val_metrics['val_loss'], label='Val', marker='.')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss Curves')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Accuracy
axes[1].plot(train_metrics['epoch'], train_metrics['train_acc'], label='Train', marker='.')
axes[1].plot(val_metrics['epoch'], val_metrics['val_acc'], label='Val', marker='.')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy Curves')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/training_curves.png", dpi=150)
plt.show()

print(f"\nâœ“ Results:")
print(f"  Final train_acc: {train_metrics['train_acc'].iloc[-1]:.4f}")
print(f"  Final val_acc:   {val_metrics['val_acc'].iloc[-1]:.4f}")
print(f"  Best val_acc:    {val_metrics['val_acc'].max():.4f}")
if len(test_metrics) > 0:
    print(f"  Test acc:        {test_metrics['test_acc'].iloc[0]:.4f}")

In [None]:
from IPython.display import IFrame

print("optimization history:")
display(IFrame(src=f"results/optimization_history.html", width=800, height=400))

print("\nparam Importances:")
display(IFrame(src=f"results/param_importances.html", width=800, height=400))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

DRIVE_DIR = "/content/drive/MyDrive/aml-project-results"
os.makedirs(DRIVE_DIR, exist_ok=True)

!cp -r {RESULTS_DIR}/* {DRIVE_DIR}/
print(f"Saved to {DRIVE_DIR}")


In [None]:
print("=" * 60)
print("CENTRALIZED BASELINE")
print("=" * 60)

print("\nBEST HYPERPARAMETERS")
print("-" * 60)
# print(f"lr: {BEST_LR}")
# print(f"momentum: {BEST_MOMENTUM}")
# print(f"batch size: {BEST_BATCH_SIZE}")

print("\nðŸ“ˆ RESULTS")
print("-" * 60)
print(f"best val_acc: {val_metrics['val_acc'].max():.4f}")
if len(test_metrics) > 0:
    print(f"test acc: {test_metrics['test_acc'].iloc[0]:.4f}")