In [20]:
import torch
from tqdm import tqdm
from ogb.graphproppred import Evaluator
import torch.optim as optim
import wandb
from ogb.graphproppred import PygGraphPropPredDataset
from torch_geometric.loader import DataLoader
import sys
BASE_PATH = globals()['_dh'][0].parent.absolute()
sys.path.insert(1, str(BASE_PATH))
from src.models.models import GCN
import torch.nn.functional as F

In [16]:
def train_inductive_epoch(model, loader, optimizer):
    model.train()
    criterion = torch.nn.BCEWithLogitsLoss()
    for step, batched_data in enumerate(tqdm(loader, desc="Iteration")):  # Iterate in batches over the training dataset.
        
        pred = model(batched_data.x, batched_data.edge_index, batched_data.batch)
        ## ignore nan targets (unlabeled) when computing training loss.
        is_labeled = batched_data.y == batched_data.y
        loss = criterion(pred.to(torch.float32)[is_labeled], batched_data.y.to(torch.float32)[is_labeled])
        optimizer.zero_grad()  
        loss.backward() 
        optimizer.step()

In [29]:
dataset = PygGraphPropPredDataset(name = "ogbg-molhiv") 
split_idx = dataset.get_idx_split() 
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False)
item = next(iter(train_loader))

Downloading http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/hiv.zip


Downloaded 0.00 GB: 100%|██████████| 3/3 [00:05<00:00,  1.68s/it]
Processing...


Extracting dataset/hiv.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 41127/41127 [00:00<00:00, 62966.00it/s]


Converting graphs into PyG objects...


100%|██████████| 41127/41127 [00:01<00:00, 24169.78it/s]


Saving...


Done!


In [30]:
dataset.num_tasks

1

In [None]:
item

In [31]:
model = GCN_inductive(
            num_tasks=dataset.num_tasks,
            hidden_dim=32,
            num_layers=2,
            dropout=0.5)

In [4]:
evaluator = Evaluator(name="ogbg-molpcba")
evaluator.expected_input_format

"==== Expected input format of Evaluator for ogbg-molpcba\n{'y_true': y_true, 'y_pred': y_pred}\n- y_true: numpy ndarray or torch tensor of shape (num_graphs, num_tasks)\n- y_pred: numpy ndarray or torch tensor of shape (num_graphs, num_tasks)\nwhere y_pred stores score values (for computing AUC score),\nnum_task is 128, and each row corresponds to one graph.\nnan values in y_true are ignored during evaluation.\n"

In [6]:
pred = model(item.x, item.edge_index, item.batch)

In [9]:
# pred = F.log_softmax(pred, dim=1).argmax(dim=1)
criterion= torch.nn.BCEWithLogitsLoss()
is_labeled = item.y == item.y
item

DataBatch(edge_index=[2, 1742], edge_attr=[1742, 3], x=[805, 9], y=[32, 128], num_nodes=805, batch=[805], ptr=[33])

In [None]:
print(is_labeled.size())
print(pred.size())

In [None]:
item.y.size()

In [10]:
criterion(pred[is_labeled], item.y.to(torch.float32)[is_labeled])


tensor(0.7056, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [None]:
data_details = {
    "num_node_features": dataset.num_node_features,
    "num_edge_features": dataset.num_edge_features,
    "num_classes": dataset.num_classes
}

In [12]:
config = {
    'num_epochs': 200,
    'dataset_name': "Computers",
    'noise_percent': 0,
    'hid_dim': 32,
    'num_iter_layers': 6,
    'smooth_fac': 0.8,
    'dropout': 0.5,
    'learning_rate': 0.01,
    'weight_decay': 4e-4
} 

In [32]:
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
train_inductive_epoch(model, loader=train_loader, optimizer=optimizer)

Iteration: 100%|██████████| 1029/1029 [00:14<00:00, 73.15it/s]


In [20]:
def train_inductive_epoch(model, loader, optimizer):
    model.train()
    criterion = torch.nn.BCEWithLogitsLoss()
    for step, batched_data in enumerate(tqdm(loader, desc="Iteration")):  # Iterate in batches over the training dataset.
        
        pred = model(batched_data.x, batched_data.edge_index, batched_data.batch)
        ## ignore nan targets (unlabeled) when computing training loss.
        is_labeled = batched_data.y == batched_data.y
        loss = criterion(pred.to(torch.float32)[is_labeled], batched_data.y.to(torch.float32)[is_labeled])
        optimizer.zero_grad()  
        loss.backward() 
        optimizer.step()
        
def eval_inductive(model, loader, evaluator):
    model.eval()
    y_true = []
    y_pred = []
    for step, batched_data in enumerate(tqdm(loader, desc="Iteration")):
        with torch.no_grad():
            pred = model(batched_data.x, batched_data.edge_index, batched_data.batch)
            y_true.append(batched_data.y.view(pred.shape).detach())
            y_pred.append(pred.detach())
    y_true = torch.cat(y_true, dim = 0).numpy()
    y_pred = torch.cat(y_pred, dim = 0).numpy()
    input_dict = {"y_true": y_true, "y_pred": y_pred}
    return evaluator.eval(input_dict)

In [34]:
e = eval_inductive(model, valid_loader, evaluator)

Iteration: 100%|██████████| 129/129 [00:01<00:00, 101.45it/s]


RuntimeError: Number of tasks for ogbg-molpcba should be 128 but 1 given

In [35]:
e

{'ap': 0.04068080172612249}

In [None]:
def train_inductive(model, train_loader, valid_loader, config):
    wandb.watch(model, log="all", log_freq=10)
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    for epoch in range(config.num_epochs):
        train_inductive_epoch(model, train_loader, optimizer)
        ap = eval_inductive(model, valid_loader, evaluator)
        wandb.log({
            "Validate ap": ap
        })


In [2]:
from ogb.graphproppred import PygGraphPropPredDataset
dataset = PygGraphPropPredDataset(name='ogbg-molhiv') 

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
type(dataset)

ogb.graphproppred.dataset_pyg.PygGraphPropPredDataset

In [5]:
split_idx = dataset.get_idx_split() 

In [13]:
split_idx["train"].size()[0]

32901

In [18]:
import os
import wandb
wandb.login()
with wandb.init(project="IterativeMethods", job_type="loadModelTry", config=None) as run:
    model_artifact = run.use_artifact("trainedGCNtry:v0")
    model_dir = model_artifact.download()
    model_path = os.path.join(model_dir, "initialized_model.pth")
    model_config = model_artifact.metadata


[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [19]:
model_config

{'dropout': 0.5,
 'hid_dim': 32,
 'num_epochs': 200,
 'smooth_fac': 0.7,
 'dataset_name': 'Cora',
 'weight_decay': 0.0004,
 'learning_rate': 0.01,
 'noise_percent': 0,
 'num_iter_layers': 9}

In [25]:
cf = {
    "input_dim": 1433,
    "output_dim": 7,
    "hidden_dim": 32,
    "num_layers": 9
}

In [26]:
model = GCN(**cf)

In [27]:
print(model)

GCN(
  (first_gc): GCNConv(1433, 32)
  (gcs): ModuleList(
    (0-6): 7 x GCNConv(32, 32)
  )
  (final_gc): GCNConv(32, 7)
)


In [28]:
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [29]:
"aa" + " bb "

'aa bb '

In [1]:
scheduler = None

In [3]:
scheduler.step()

AttributeError: 'NoneType' object has no attribute 'step'