In [1]:
import os
import sys
import torch
import torch_geometric
from pathlib import Path
from model import GNNPolicy
from data_type import GraphDataset
from utils import process

%load_ext autoreload
%autoreload 2


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/data/conda_envs/envs/l2sn/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/data/conda_envs/envs/l2sn/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/data/conda_envs/envs/l2sn/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/data/conda_envs/envs/l2sn/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start

In [2]:
problem = 'FCMCNF'
lr = 0.005
n_epoch = 5
patience = 10
early_stopping = 20
normalize = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_train = 1
batch_valid  = 256

loss_fn = torch.nn.BCELoss()
optimizer_fn = torch.optim.Adam

In [3]:
train_losses = []
valid_losses = []
train_accs = []
valid_accs = []

train_files = [ str(path) for path in Path(os.path.join(os.path.abspath(''), 
                                                        f"../node_selection/data/{problem}/train")).glob("*.pt") ]

valid_files = [ str(path) for path in Path(os.path.join(os.path.abspath(''), 
                                                        f"../node_selection/data/{problem}/valid")).glob("*.pt") ]


train_files += valid_files[:7000]
valid_files = valid_files[7000:]

train_data = GraphDataset(train_files)
valid_data = GraphDataset(valid_files)

#inspect(train_data[:100])

# TO DO : learn something from the data
train_loader = torch_geometric.loader.DataLoader(train_data, 
                                                 batch_size=batch_train, 
                                                 shuffle=True, 
                                                 follow_batch=['constraint_features_s', 
                                                               'constraint_features_t',
                                                               'variable_features_s',
                                                               'variable_features_t'])

valid_loader = torch_geometric.loader.DataLoader(valid_data, 
                                                 batch_size=batch_valid, 
                                                 shuffle=False, 
                                                 follow_batch=['constraint_features_s',
                                                               'constraint_features_t',
                                                               'variable_features_s',
                                                               'variable_features_t'])

policy = GNNPolicy().to(device)
optimizer = optimizer_fn(policy.parameters(), lr=lr) #ADAM is the best

print("-------------------------")
print(f"GNN for problem {problem}")
print(f"Training on:          {len(train_data)} samples")
print(f"Validating on:        {len(valid_data)} samples")
print(f"Batch Size Train:     {batch_train}")
print(f"Batch Size Valid      {batch_valid}")
print(f"Learning rate:        {lr} ")
print(f"Number of epochs:     {n_epoch}")
print(f"Normalize:            {normalize}")
print(f"Device:               {device}")
print(f"Loss fct:             {loss_fn}")
print(f"Optimizer:            {optimizer_fn}")  
print(f"Model's Size:         {sum(p.numel() for p in policy.parameters())} parameters ")
print("-------------------------") 

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [25]:
for epoch in range(n_epoch):
    print(f"Epoch {epoch + 1}")

    train_loss, train_acc = process(policy, 
                                    train_loader, 
                                    loss_fn,
                                    device,
                                    optimizer=optimizer, 
                                    normalize=normalize)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    print(f"Train loss: {train_loss:0.3f}, accuracy {train_acc:0.3f}" )

    valid_loss, valid_acc = process(policy, 
                                    valid_loader, 
                                    loss_fn, 
                                    device,
                                    optimizer=None,
                                    normalize=normalize)
    valid_losses.append(valid_loss)
    valid_accs.append(valid_acc)

    print(f"Valid loss: {valid_loss:0.3f}, accuracy {valid_acc:0.3f}" )
torch.save(policy.state_dict(),f'policy_{problem}.pkl')

Epoch 1
Train loss: 0.072, accuracy 0.953
Valid loss: 0.061, accuracy 0.957
Epoch 2
Train loss: 0.068, accuracy 0.951
Valid loss: 0.061, accuracy 0.957
Epoch 3


KeyboardInterrupt: 

In [26]:
torch.save(policy.state_dict(),f'policy_{problem}.pkl')

In [None]:
decisions = [ policy(dvalid.to(device)).item() for dvalid in valid_data ]

import matplotlib.pyplot as plt
plt.figure(0)
plt.hist(decisions)
plt.title('decisions histogramme for valid set')
plt.savefig("./hist.png")

plt.figure(1)
plt.plot(train_losses, label='train')
plt.plot(valid_losses, label='valid')
plt.title('losses')
plt.xlabel('epoch')
plt.legend()
plt.savefig("./losses.png")


plt.figure(2)
plt.plot(train_accs, label='train')
plt.plot(valid_accs, label='valid')
plt.title('accuracies')
plt.xlabel('epoch')
plt.legend()
plt.savefig("./accuracies.png")

