In [None]:
path_to_src = '/content/drive/MyDrive/Colab Notebooks/GNN/MoTSE-main/src'
dataset_name = "QM9"
task_lst = ["mu","alpha","homo","lumo","gap","r2","zpve","u0","u298","h298","g298","cv"]

#If using Google drive
from google.colab import drive
drive.mount('/content/drive')
# get the current working directory
import sys
sys.path.insert(0,path_to_src)

Mounted at /content/drive


In [None]:
!echo $CUDA_VERSION

11.8.0


In [None]:
#!pip3 uninstall dgl -y
!pip3 install  dgl -f https://data.dgl.ai/wheels/cu118/repo.html
!pip3 install dgllife
!pip3 install rdkit

Looking in links: https://data.dgl.ai/wheels/cu118/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/cu118/dgl-1.1.3%2Bcu118-cp310-cp310-manylinux1_x86_64.whl (93.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.8/93.8 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dgl
Successfully installed dgl-1.1.3+cu118
Collecting dgllife
  Downloading dgllife-0.3.2-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dgllife
Successfully installed dgllife-0.3.2
Collecting rdkit
  Downloading rdkit-2023.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.5/30.5 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.2


In [None]:
import torch
import pandas as pd
from utils import load_model, makedir, set_random_seed
from utils.data import load_data
from trainer import Trainer

%load_ext autoreload
%autoreload 2
set_random_seed(22)

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


# Pretraining Source Tasks

## Configuration

In [None]:
tasks = ["mu","alpha","homo","lumo","gap","r2","zpve","u0","u298","h298","g298","cv"]
data_path = path_to_src + f'/../datasets/{dataset_name}/10000/'
model_type = 'GCN'
model_path = path_to_src + f"/../saved_models/{dataset_name}/GCN/10000/"
makedir(model_path)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
results_dict = {'task':[]}

## Training

In [None]:
for task in tasks:
    print(task)
    train_loader, val_loader, test_loader, data_args = load_data(
        dataset=dataset_name,
        data_path=data_path,
        tasks=[task],
        device = device
    )
    model = load_model(n_tasks=1, device=device)
    trainer = Trainer(device=device,tasks=[task],
                      data_args=data_args,model_path=model_path,
                     )
    model, task_results_dict = trainer.fit(model, train_loader,
                                      val_loader, test_loader)
    results_dict['task'].append(task)
    for metric in data_args['metrics']:
        if metric not in list(results_dict.keys()):
            results_dict.update({metric:[]})
        results_dict[metric].append(task_results_dict[metric][task])

mu
8000 loaded!
1000 loaded!
1000 loaded!
[0] training loss:0.812421104669571
val r2:0.2645375889423013
val mae:0.987579345703125
[20] training loss:0.6339807767271995
val r2:0.37945469314639846
val mae:0.9008296728134155
[40] training loss:0.6124025025963783
val r2:0.40340544653237
val mae:0.8725454807281494
[60] training loss:0.6031891531348228
val r2:0.42277022516901863
val mae:0.8656215667724609
[80] training loss:0.5873089435100556
val r2:0.4320407355032707
val mae:0.84896320104599
[100] training loss:0.5806761576533318
val r2:0.4510780743506211
val mae:0.8310855031013489
[120] training loss:0.5697480791211128
val r2:0.4531837100813272
val mae:0.8303969502449036
[140] training loss:0.5559694228172303
val r2:0.45399958936612717
val mae:0.833116888999939
[160] training loss:0.5483494055271149
val r2:0.4283843136761265
val mae:0.8767456412315369
[180] training loss:0.5455195774435997
val r2:0.4498248120634367
val mae:0.8521329164505005
[200] training loss:0.5324652498960495
val r2:0.

In [None]:
result_path = model_path.replace('saved_models','results')
makedir(result_path)
pd.DataFrame(results_dict).to_csv(result_path+'results.csv', float_format='%.3f',
                                  index=False)
print(f"Results have been saved to {result_path+'results.csv'}")

Results have been saved to ../results/QM9/GCN/10000/results.csv


# Training Target Tasks

## Configuration

In [None]:
tasks = task_lst
data_path = path_to_src + f'/../datasets/{dataset_name}/1000/'
model_type = 'GCN'
model_path = path_to_src + f"/../saved_models/{dataset_name}/GCN/1000/"
makedir(model_path)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
results_dict = {'task':[]}

## Training

In [None]:
for task in tasks:
    print(task)
    train_loader, val_loader, test_loader, data_args = load_data(
        dataset=dataset_name,
        data_path=data_path,
        tasks=[task],
        device = device
    )
    model = load_model(n_tasks=1, device=device)
    trainer = Trainer(device=device,tasks=[task],
                      data_args=data_args,model_path=model_path,
                     )
    model, task_results_dict = trainer.fit(model, train_loader,
                                      val_loader, test_loader)
    results_dict['task'].append(task)
    for metric in data_args['metrics']:
        if metric not in list(results_dict.keys()):
            results_dict.update({metric:[]})
        results_dict[metric].append(task_results_dict[metric][task])

mu
preprocessing data ...
800 loaded!
preprocessing data ...
100 loaded!
preprocessing data ...
1000 loaded!
[0] training loss:0.9425276380777359
val r2:0.12966066126128672
val mae:1.1412886381149292
[20] training loss:0.7278076928853988
val r2:0.3142827485076205
val mae:0.9740509986877441
[40] training loss:0.682363720536232
val r2:0.32774297279999565
val mae:0.9582579135894775
test r2:0.3470229648742369
test mae:0.8737985491752625
alpha
800 loaded!
100 loaded!
1000 loaded!
[0] training loss:0.8359795388579369
val r2:0.2654919767856867
val mae:5.543822765350342
[20] training loss:0.33755888521671296
val r2:0.6406691611564099
val mae:3.8321316242218018
[40] training loss:0.3116554103791714
val r2:0.6307392091471757
val mae:3.9431817531585693
[60] training loss:0.2863827235996723
val r2:0.692662076557718
val mae:3.615518808364868
[80] training loss:0.2570268312096596
val r2:0.7261025893244109
val mae:3.4489409923553467
[100] training loss:0.24893431454896928
val r2:0.7597863490499838
va

In [None]:
result_path = model_path.replace('saved_models','results')
makedir(result_path)
pd.DataFrame(results_dict).to_csv(result_path+'results.csv', float_format='%.3f',
                                  index=False)
print(f"Results have been saved to {result_path+'results.csv'}")

Results have been saved to /content/drive/MyDrive/Colab Notebooks/GNN/MoTSE-main/src/../results/QM9/GCN/1000/results.csv
