In [7]:
import pandas as pd

Load preprocessed test data

In [9]:
x = pd.read_csv("../Downloads/Telegram Desktop/clean_test_with_FP.csv")

Format to MolCLR format and path

In [10]:
x = x.loc[:, ["clean"]]
x["active"] = 0

x.columns = ["smiles", "active"]

x.to_csv('data/covid/COVID-test.csv', index=False)


In [11]:
from dataset.dataset_mix_both import MoleculeDataset

Model config

In [12]:
config = {
    "batch_size": 32,
    "dataset": {
        "num_workers": 4  ,              # dataloader number of workers
        "valid_size": 0.1  ,             # ratio of validation data
        "test_size": 0.1    ,            # ratio of test data
        "splitting": "scaffold"           # data splitting (i.e., random/scaffold)
    },
    "model": {
        "num_layer": 5        ,          # number of graph conv layers
        "emb_dim": 300        ,          # embedding dimension in graph conv layers
        "feat_dim": 512       ,          # output feature dimention
        "drop_ratio": 0.3     ,          # dropout ratio
        "pool": "mean"
    } 

}

In [13]:
config['dataset']['task'] = 'classification'
config['dataset']['data_path'] = 'data/covid/COVID-test.csv'

Load test dataset

In [17]:
dataset = MoleculeDataset(data_path='data/covid/COVID-test.csv', target="active", task="classification")

1614


Load model from config

In [20]:
from models.ginet_fp_finetune_contrast import GINet

model = GINet(config['dataset']['task'], **config["model"])

Load model weights

In [None]:
import torch

sd = torch.load("finetune/Feb23_19-17-46_COVID_active/checkpoints/model.pth")

model.load_state_dict(sd)

Convert dataset to dataloader

In [21]:
from torch_geometric.data import DataLoader

dl = DataLoader(dataset, shuffle=False, batch_size=32)

Make predictions (model was trained with logits, so we need to pass prediction through sigmoid function)

In [60]:
from tqdm import tqdm
res = []
for bs, _, _ in tqdm(dl, total=1614 // 32 + 1):
    res.extend(model(bs)[1][:, 1].sigmoid().detach().numpy())

100%|███████████████████████████████████████████████████████████████████████████████████| 51/51 [00:21<00:00,  2.32it/s]


In [61]:
import numpy as np
res = np.array(res)
np.save("sub.npy", res)

Reorder to original test order (we use cleaned dataset, so order there is broken)

In [None]:
res = res[pd.read_csv("../../../Downloads/Telegram Desktop/clean_test_with_FP.csv").sort_values("Unnamed: 0.1")["Unnamed: 0"].values]

Load predictions from random forest

In [None]:
rf_sub = np.load("../YouGraph/ogb/molhiv/rf_preds/rf_final_pred.npy")

Blend it

In [None]:
scores = (rf_sub[:, 1] / 2 + res / 2)

Make submission file

In [None]:
test = pd.read_csv("../../../global-ai-challenge-molecules/Task/test.csv")
test["Active"] = (score > 0.348).astype(int)
test.to_csv("../Downloads/new-sub-tuned.csv", index=False)