In [1]:
# Reload automatically when the file is changed.
%load_ext autoreload
%autoreload 2

# Importing Modules

In [2]:
import os 
import sys
MODULES_PATH = "../../Modules/PAGTN"
MODELS_PATH = "../../Models"

sys.path.append(MODULES_PATH)
sys.path.append(MODELS_PATH)


from Preprocessing import *
from Dataloaders_Preprocessing import *
from PAGTN import *
from Train import *





# Importing Data

In [3]:
List_Data_Train = Load_Data('../../Data/atoms/train', '../../Data/energies/train.csv')
List_Data_Test= Load_Test_Data('../../Data/atoms/test')

In [4]:
Display_Molecule_From_Atom_List(List_Data_Train[0]['Atoms_List'], Width=800, Height=800, Background_Color='lightblue')


In [5]:
def collate_fn(batch):
    ids, mol_graphs, energies = zip(*batch)
    return list(ids), list(mol_graphs), torch.stack(energies) if energies[0] is not None else None

In [6]:

Dataset_Train = PAGTN_Dataset(Data_List = List_Data_Train,
                              max_distance=5, return_energies=True, device='cpu', p_embed=True, ring_embed=True, self_attn=False, no_truncate=False)

Dataset_Test = PAGTN_Dataset(Data_List = List_Data_Test,
                                            max_distance=5, return_energies=False, device='cpu', p_embed=True, ring_embed=True, self_attn=False, no_truncate=False)




In [7]:
# Configuration des paramètres pour les classes MolTransformer et PropPredictor
HIDDEN_SIZE = 1024
NB_HEADS = 30
D_K = 32
DEPTH = 1
DROPOUT = 0.1
MAX_DISTANCE = 23
P_EMBED = True
RING_EMBED = True
SELF_ATTN = False
NO_SHARE = False
MASK_NEIGH = True
AGG_FUNC = 'mean'
N_CLASSES = 1
BATCH_SIZE = 64
EPOCHS = 100
LEARNING_RATE = 1e-3

# Affichage des paramètres pour vérification
print("Configuration des paramètres du PAGTN :")
print(f"HIDDEN_SIZE: {HIDDEN_SIZE}")
print(f"NB_HEADS: {NB_HEADS}")
print(f"D_K: {D_K}")
print(f"DEPTH: {DEPTH}")
print(f"DROPOUT: {DROPOUT}")
print(f"MAX_DISTANCE: {MAX_DISTANCE}")
print(f"P_EMBED: {P_EMBED}")
print(f"RING_EMBED: {RING_EMBED}")
print(f"SELF_ATTN: {SELF_ATTN}")
print(f"NO_SHARE: {NO_SHARE}")
print(f"MASK_NEIGH: {MASK_NEIGH}")
print(f"AGG_FUNC: {AGG_FUNC}")
print(f"N_CLASSES: {N_CLASSES}")
print(f"BATCH_SIZE: {BATCH_SIZE}")
print(f"EPOCHS: {EPOCHS}")
print(f"LEARNING_RATE: {LEARNING_RATE}")

# Instanciation du modèle
import torch
Device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {Device}")

Model = PropPredictor(
    hidden_size=HIDDEN_SIZE,
    n_heads=NB_HEADS,
    d_k=D_K,
    depth=DEPTH,
    dropout=DROPOUT,
    max_distance=MAX_DISTANCE,
    p_embed=P_EMBED,
    ring_embed=RING_EMBED,
    self_attn=SELF_ATTN,
    no_share=NO_SHARE,
    mask_neigh=MASK_NEIGH,
    agg_func=AGG_FUNC,
    n_classes=N_CLASSES
)

Model.to(Device)
print("Model PAGTN instancié avec succès.")

Configuration des paramètres du PAGTN :
HIDDEN_SIZE: 1024
NB_HEADS: 30
D_K: 32
DEPTH: 1
DROPOUT: 0.1
MAX_DISTANCE: 23
P_EMBED: True
RING_EMBED: True
SELF_ATTN: False
NO_SHARE: False
MASK_NEIGH: True
AGG_FUNC: mean
N_CLASSES: 1
BATCH_SIZE: 64
EPOCHS: 100
LEARNING_RATE: 0.001
Using device: cuda
Model PAGTN instancié avec succès.


In [8]:
PATH_SAVING_MODEL = '../../Saved_Models/PAGTN/Best_Model.pth'

os.makedirs(os.path.dirname(PATH_SAVING_MODEL), exist_ok=True)




In [9]:
# NB parameters
Nb_Parameters = sum(p.numel() for p in Model.parameters() if p.requires_grad)
print(f"Number of parameters in the model: {Nb_Parameters}")

Number of parameters in the model: 2231650


In [10]:
DataLoader_Train = DataLoader(Dataset_Train, batch_size=64, shuffle=True, collate_fn=collate_fn)
DataLoader_Test = DataLoader(Dataset_Test, batch_size=64, shuffle=False, collate_fn=collate_fn)

In [11]:
Device

device(type='cuda')

In [None]:
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler

# Nettoyage du cache GPU
torch.cuda.empty_cache()

# Optimizer & Device
Optimizer = torch.optim.Adam(Model.parameters(), lr=1e-3)
Device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Criterion = nn.MSELoss()

# ✅ Exemple 1 : StepLR (décroît le LR tous les 5 epochs)
# Scheduler = lr_scheduler.StepLR(Optimizer, step_size=5, gamma=0.5)

# ✅ Exemple 2 : ReduceLROnPlateau (réduit le LR si la perte de test ne diminue pas)
Scheduler = lr_scheduler.ReduceLROnPlateau(
    Optimizer,
    mode='min',
    factor=0.5,
    patience=2,   # n epochs sans amélioration avant réduction du LR
    verbose=True
)

# 🚀 Appel à Train avec le Scheduler
List_Train_Losses_Per_Epochs, List_Test_Losses_Per_Epochs, \
List_Train_Losses_Per_Batches, List_Test_Losses_Per_Batches = Train(
    Model,
    DataLoader_Train,
    DataLoader_Train,  # Remplace par DataLoader_Test si tu as un vrai jeu de test
    Optimizer,
    Criterion,
    Num_Epochs=10,
    Device=Device,
    Save_Path=PATH_SAVING_MODEL,
    Scheduler=Scheduler  # ✅ Ici on le passe
)

# 🚀 Affichage + sauvegarde du graphique
Plot_Losses(
    List_Train_Losses_Per_Epochs,
    List_Test_Losses_Per_Epochs,
    List_Train_Losses_Per_Batches,
    List_Test_Losses_Per_Batches,
    Save=True,
    Save_Path='losses_plot.png'
)




Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/103 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Batches:   0%|          | 0/103 [00:00<?, ?it/s]

New best test loss: 139.465, model saved to ../../Saved_Models/PAGTN/Best_Model.pth
Epoch: 1/10 Train Loss: 706.377 Test Loss: 139.465


Batches:   0%|          | 0/103 [00:00<?, ?it/s]