<a href="https://colab.research.google.com/github/Clearbloo/Feynman_GNN/blob/main/Feynman_GNN_v5.0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Install and import libraries**


In [1]:
## Standard libraries
import os
import os.path as osp
import json
import math
import numpy as np 
import time
import pandas as pd
import ast
import random as rnd
from typing import Optional
from functools import partial
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, f1_score, \
    accuracy_score, precision_score, recall_score, roc_auc_score
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgb
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()
sns.set()

# Load the TensorBoard notebook extension
%load_ext tensorboard

## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
from torch.nn import MSELoss
# Torchvision
import torchvision
from torchvision import transforms
# PyTorch Lightning
try:
    import pytorch_lightning as pl
except ModuleNotFoundError: # Google Colab does not have PyTorch Lightning installed by default. Hence, we do it here if necessary
    !pip install pytorch-lightning>=1.4
    import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

## Ray
try:
    import ray
except ModuleNotFoundError: # Google Colab does not have Ray installed by default.
    !pip install ray
    import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.tune.integration.pytorch_lightning import TuneReportCallback, TuneReportCheckpointCallback

## Tensorboard
try:
  import tensorboardX
except ModuleNotFoundError:
  !pip install tensorboardX
  import tensorboardX

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = "/content/gdrive/MyDrive/Part_III_Project/data/"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "/content/gdrive/MyDrive/Part_III_Project/saved_models/"

# Setting the seed
pl.seed_everything()

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

# torch geometric
try: 
    import torch_geometric
except ModuleNotFoundError:
    # Installing torch geometric packages with specific CUDA+PyTorch version. 
    # See https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html for details 
    TORCH = torch.__version__.split('+')[0]
    CUDA = 'cu' + torch.version.cuda.replace('.','')

    !pip install --quiet torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install --quiet torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install --quiet torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install --quiet torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install --quiet torch-geometric 
    import torch_geometric
import torch_geometric.nn as geom_nn
import torch_geometric.data as geom_data
from torch_geometric.data import Dataset, Data, InMemoryDataset
from torch_geometric.loader import DataLoader
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TopKPooling 
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp


Mounted at /content/gdrive
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.8.0 requires tf-estimator-nightly==2.8.0.dev2021122109, which is not installed.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.[0m
Collecting ray
  Downloading ray-1.11.0-cp37-cp37m-manylinux2014_x86_64.whl (52.7 MB)
[K     |████████████████████████████████| 52.7 MB 139 kB/s 
Collecting redis>=3.5.0
  Downloading redis-4.1.4-py3-none-any.whl (175 kB)
[K     |████████████████████████████████| 175 kB 57.7 MB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 44.5 MB/s 
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: depreca

  rank_zero_warn(f"No seed found, seed set to {seed}")
Global seed set to 2434805482


cpu
[K     |████████████████████████████████| 7.9 MB 2.5 MB/s 
[K     |████████████████████████████████| 3.5 MB 2.6 MB/s 
[K     |████████████████████████████████| 2.5 MB 2.5 MB/s 
[K     |████████████████████████████████| 750 kB 2.1 MB/s 
[K     |████████████████████████████████| 407 kB 4.1 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


#**My own dataset class**

In [2]:
class FeynmanDataset(Dataset):
    def __init__(self, dataset_size, filename, reprocess: bool = False, root=DATASET_PATH, test: bool = False, train: bool = False, val: bool = False, pred: bool = False, transform=None, pre_transform=None, pre_filter=None):
      """
      root = directory where dataset should be stored. Contains raw data in raw_dir and processed data in processed_dir
      test, train, val = bools, what type of dataset you want. default all false
      """
      self.filename = filename
      self.trunc_size= 10000
      self.test = test 
      self.train = train
      self.val = val
      self.pred = pred
      self.reproc = reprocess
      self.label="full"
      if self.train == True:
        self.label="train"
      if self.val == True:
        self.label="val"
      if self.test == True:
        self.label="test"
      if self.pred == True:
        self.label="pred"

      if self.reproc == True:
        overwrite_conf = input("This may overwrite old dataset files. Are you happy for this to happen? Type 'yes' to confirm. \n")
        if overwrite_conf == 'yes':
          pass
      else:
          self.reproc = False
      
      self.dataset_size = dataset_size
      super().__init__(root, transform, pre_transform, pre_filter)

    @property
    def raw_file_names(self):
      #skips download if this is found
      return self.filename

    @property
    def processed_file_names(self):
      #will skip the process method if the following files are found
      if self.reproc == False:
        proc_files = [0]*self.dataset_size
        for idx in range(self.dataset_size):
          proc_files[idx] =  f'{self.label}_data_{idx}.pt'
          #if idx % 10000 == 0:
            #print(idx)
        #print(proc_files[-1])
        return proc_files
      #need to change this true case to delete files instead
      if self.reproc == True:
        return "empty"
      

    def download(self):
        # Download to `self.raw_dir`. In the future I will make this call a
        # python file to build the dataset as a csv
        print("No files to download")
        pass

    def process(self):
      #time.sleep(60)
      print("reading data")
      self.full_data = pd.read_csv(self.raw_paths[0])
      print("sampling data")
      self.data = self.full_data[0:self.dataset_size]
      #self.data = self.full_data.sample(n=self.dataset_size)

      #create a list of all y values
      print("creating list of y values")
      all_y_values=self.data['y'].tolist()
      y_max = max(all_y_values)
      y_min = min(all_y_values)
      

      #truncate dataframe into smaller dataframes
      split_size = math.ceil(self.dataset_size/self.trunc_size)
      self.trunc_data_list = np.array_split(self.data, split_size)
      #cycle through graphs and create data objects for each
      idx=0
      for k in tqdm(range(len(self.trunc_data_list))):
        self.trunc_data=self.trunc_data_list[k]
        for row, feyndiag in tqdm(self.trunc_data.iterrows(),total=self.trunc_data.shape[0]):
          #node features
          #start = time.time()
          x = self._get_node_features(feyndiag)
          #end=time.time()
          #print("time to get node features: ", end-start)
          #edge features
          #start = time.time()
          edge_attr = self._get_edge_features(feyndiag)
          #end=time.time()
          #print("time to get edge features: ", end-start)
          #adjacency list
          #start = time.time()
          edge_index = self._get_adj_list(feyndiag)
          #end = time.time()
          #print("time to get adj list: ", end-start)
          #targets
          y = self._get_targets(feyndiag)
          #normalized targets to the interval [0,1]
          y_norm = (y-y_min)/(y_max-y_min)
          
          #create data object
          #print("creating data object")
          data = Data(x=x, edge_index = edge_index, edge_attr=edge_attr, y=y, y_norm=y_norm)
          #save file
          start=time.time()
          torch.save(data, osp.join(self.processed_dir, f'{self.label}_data_{idx}.pt'))
          end=time.time()

          if idx % 10000 == 0:
            print("save time is: ", end-start)
          idx+=1      

    def _get_node_features(self, diagram):
      """
      This will return a list of the node feature vectors (which are 1D)
      [Number of Nodes, 1]
      """
      x = ast.literal_eval(diagram.loc['x'])
      x = torch.tensor(x,dtype=torch.float).view(-1,1)
      return x

    def _get_edge_features(self, diagram):
      """
      This will return a list of the edge feature vectors (which are 11D)
      [Number of Edges, 11]
      """
      attr = ast.literal_eval(diagram.loc['edge_attr'])
      return torch.tensor(attr,dtype=torch.float).view(-1,11)
      
    def _get_adj_list(self, diagram):
      """
      This will return a list of the adjacency vectors (which are 2D)
      [2, Number of Edges]
      """
      adj_list = ast.literal_eval(diagram.loc['edge_index'])
      return torch.tensor(adj_list,dtype=torch.long).view(2,-1)

    def _get_targets(self, diagram):
      """
      This will return a list of the target vectors (which are 1D)
      [Number of targets, 1]
      """
      y = diagram.loc['y']
      return torch.tensor(y,dtype=torch.float)

    def len(self):
      return len(self.processed_file_names)

    def get(self, idx):
      data = torch.load(osp.join(self.processed_dir, f'{self.label}_data_{idx}.pt'))
      return data

#**Load the dataset**

In [8]:
#drive.mount("/content/gdrive", force_remount=True)
filename = 'QED_data.csv'
dataset_size=10000
#torch.save(0, osp.join(DATASET_PATH, f'processed/force_skip.pt'))
full_dataset = FeynmanDataset(dataset_size=dataset_size, filename=filename)

In [4]:
"""
print("Loading datasets...")
train_dataset = FeynmanDataset(1000000, reprocess=False, filename=filename, train=True)
test_dataset = FeynmanDataset(100, reprocess=False, filename=filename, test=True)
val_dataset = FeynmanDataset(50, reprocess=False, filename=filename, val=True)
pred_dataset = FeynmanDataset(10, reprocess=False, filename=filename, pred=True)
print("Finished all!")
"""

'\nprint("Loading datasets...")\ntrain_dataset = FeynmanDataset(1000000, reprocess=False, filename=filename, train=True)\ntest_dataset = FeynmanDataset(100, reprocess=False, filename=filename, test=True)\nval_dataset = FeynmanDataset(50, reprocess=False, filename=filename, val=True)\npred_dataset = FeynmanDataset(10, reprocess=False, filename=filename, pred=True)\nprint("Finished all!")\n'

In [9]:
dataset_indices= np.arange(dataset_size)
rnd.shuffle(dataset_indices)
dataset_indices=dataset_indices.tolist()

In [10]:
print("Loading datasets...")
#60:20:20 split for train:val:test
train_dataset = full_dataset[dataset_indices[0:int(dataset_size*3/5)]]
val_dataset = full_dataset[dataset_indices[int(dataset_size*3/5):int(dataset_size*4/5)]]
test_dataset = full_dataset[dataset_indices[int(dataset_size*4/5):dataset_size]]
pred_dataset = full_dataset[dataset_indices[-10:dataset_size]]
print("Finished all!")

Loading datasets...
Finished all!


In [11]:
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, num_workers=0)
val_loader = DataLoader(dataset=val_dataset, batch_size=64, num_workers=0)
pred_loader = DataLoader(dataset=pred_dataset, batch_size=1) #keep this batch_size as one to get predictions to work

In [13]:
mean_y=0
var=0
for i in range(100):
  mean_y += train_dataset[i]['y_norm']
mean_y=mean_y/100

for i in range(500):
  var += (train_dataset[i]['y_norm']-mean_y)**2
var=var/500

In [14]:
print(var)

tensor(0.1388)


#**Some loss functions**
Defining some loss functions

In [15]:
class LogCoshLoss(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, y_t, y_prime_t):
        ey_t = y_t - y_prime_t
        return torch.mean(torch.log(torch.cosh(ey_t + 1e-12)))


In [16]:
class RMSLELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, pred, actual):
        return torch.sqrt(self.mse(torch.log(pred + 1), torch.log(actual + 1)))

In [17]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self, pred, actual):
      return torch.sqrt(self.mse(pred, actual))

In [18]:
class MyLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.L1 = nn.L1Loss()
        self.logcosh = LogCoshLoss()
    
    def forward(self, pred, actual):
      #calculate the variance of the predictions. Expected variance of actual is of the order of 0.13
      variance_pred = torch.mean((pred - torch.mean(pred))**2)
      variance_actual = torch.mean((actual - torch.mean(actual))**2)

      #reward the network with low loss if the variances are similar
      loss_value = self.L1(pred,actual) + 5*self.mse(pred,actual) #- variance_pred #+ 2*abs(variance_pred-0.13)
      return loss_value 

#**Training code and GNN model using Lightning Module**

* Lightning training module
* Uses Transformer convolution layer

In [19]:
class FeynModel(pl.LightningModule):
    def __init__(self, c_in, c_out, layer_name, model_params, filename='QED_data.csv'):
        """
        c_in = channels in (feature dimensions, e.g. RGB is 3)
        c_out = channels out (target dimension, e.g. classification is 1)
        """
        super().__init__()
        self.filename=filename
        self.batch_size = model_params["model_batch_size"]
        embedding_size = model_params["model_embedding_size"]
        n_heads = model_params["model_attention_heads"]
        self.n_layers = model_params["model_layers"]
        dropout_rate = model_params["model_dropout_rate"]
        top_k_ratio =  model_params["model_top_k_ratio"]
        self.top_k_every_n = model_params["model_top_k_every_n"]
        dense_neurons = model_params["model_dense_neurons"]
        edge_dim = model_params["model_edge_dim"]-3 #remove momenta from edge_attr
        edge_num = 5 #need to update this
                
        gnn_layer = gnn_layer_by_name[layer_name]
        self.lr = model_params["model_learning_rate"]
        self.weight_decay = model_params["model_weight_decay"]
        self.lin_dropout_prob = model_params["model_lin_dropout_prob"]
        self.save_hyperparameters()
        self.loss_fn = MyLoss()

        self.conv_layers = ModuleList([])
        self.transf_layers = ModuleList([])
        self.pooling_layers = ModuleList([])
        self.bn_layers = ModuleList([])

        # Transformation layer
        self.conv1 = gnn_layer(in_channels=c_in,
                               out_channels=embedding_size, 
                               heads=n_heads, 
                               dropout=dropout_rate,
                               edge_dim=edge_dim
                               ) 

        self.transf1 = Linear(embedding_size*n_heads, embedding_size)
        self.bn1 = BatchNorm1d(embedding_size)

        # Other layers
        for i in range(self.n_layers):
            self.conv_layers.append(gnn_layer(embedding_size, 
                                              embedding_size, 
                                              heads=n_heads, 
                                              dropout=dropout_rate,
                                              edge_dim=edge_dim,
                                              ))

            self.transf_layers.append(Linear(embedding_size*n_heads, embedding_size))
            self.bn_layers.append(BatchNorm1d(embedding_size))
            if i % self.top_k_every_n == 0:
                self.pooling_layers.append(TopKPooling(embedding_size, ratio=top_k_ratio))

        # Final layer
        self.conv_fin = gnn_layer(in_channels=embedding_size,
                               out_channels=1, 
                               heads=n_heads, 
                               dropout=dropout_rate,
                               edge_dim=edge_dim
                               ) 
            

        # Linear layers
        self.linear0 = Linear(embedding_size*2+3*2*edge_num, embedding_size*2)
        self.linear1 = Linear((embedding_size)*2, dense_neurons)
        self.linear2 = Linear(dense_neurons,dense_neurons)
        self.linear3 = Linear(dense_neurons, c_out)
        self.linear4 = Linear(embedding_size*2+3*2*edge_num, 1)

        """
        could use super node instead of topKPooling and linear layers
        or more topK pooling rather than linear layers
        """

    def forward(self, x, edge_index, edge_attr, batch_index):
        # Remove momenta from edge features
        p = edge_attr[:,8:11]
        #select just initial and final momenta 
        print(p.size())
        p = p.reshape(max(batch_index)+1,-1)
        edge_attr = edge_attr[:,0:8]

        # Initial transformation
        x = self.conv1(x, edge_index, edge_attr)
        x = F.leaky_relu(self.transf1(x))
        x = self.bn1(x)

        # Holds the intermediate graph representations
        global_representation = []

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index, edge_attr)
            x = F.leaky_relu(self.transf_layers[i](x))
            x = self.bn_layers[i](x)
            # Always aggregate last layer
            if i % self.top_k_every_n == 0 or i == self.n_layers:
                x , edge_index, edge_attr, batch_index, _, _ = self.pooling_layers[int(i/self.top_k_every_n)](
                    x, edge_index, edge_attr, batch_index
                    )
                # Add current representation
                global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
    
        x = sum(global_representation)

        #add momenta on
        x = torch.cat((x,p),1)

        # Output block

        x = F.relu(self.linear0(x))
        x = F.dropout(x,p=self.lin_dropout_prob, training=self.training)
        x = F.relu(self.linear1(x))
        x = F.dropout(x, p=self.lin_dropout_prob, training=self.training)
        x = F.relu(self.linear2(x))
        x = F.dropout(x, p=self.lin_dropout_prob, training=self.training)
        x = torch.sigmoid(self.linear3(x))

        return x
    
    def training_step(self, batch, batch_idx):
        x, edge_index, edge_attr, y = batch['x'], batch['edge_index'], batch['edge_attr'], batch['y_norm']
        batch_idx = batch['batch']
        y_hat = self(x,
                     edge_index,
                     edge_attr,
                     batch_idx
        )
        loss = self.loss_fn(y_hat, y.view(-1,1))
        self.log("train_loss", loss, prog_bar=True, on_step=True, on_epoch=False, batch_size=max(batch_idx)+1)
        return loss

    def validation_step(self, batch, batch_idx):
        x, edge_index, edge_attr, y = batch['x'], batch['edge_index'], batch['edge_attr'], batch['y_norm']
        batch_idx = batch['batch']
        y_hat = self(x,
                     edge_index,
                     edge_attr,
                     batch_idx
        )
        loss = self.loss_fn(y_hat, y.view(-1,1))
        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True, batch_size=max(batch_idx)+1)
        return loss

    def test_step(self, batch, batch_idx):
        x, edge_index, edge_attr, y = batch['x'], batch['edge_index'], batch['edge_attr'], batch['y_norm']
        batch_idx = batch['batch']
        y_hat = self(x,
                     edge_index,
                     edge_attr,
                     batch_idx)
        loss = self.loss_fn(y_hat, y.view(-1,1))
        self.log("test_loss", loss, prog_bar=True, on_step=True, on_epoch=False, batch_size=max(batch_idx)+1)
        return loss

    def predict_step(self, batch, batch_idx):
        x, edge_index, edge_attr, y = batch['x'], batch['edge_index'], batch['edge_attr'], batch['y_norm']
        batch_idx = batch['batch']
        y_hat = self(x,
                     edge_index,
                     edge_attr,
                     batch_idx)
        return y_hat.item(), y.item()

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(),
                                lr=self.lr,
                                weight_decay=self.weight_decay,
                                )


In [20]:
def train_feyn_no_tune(params, num_gpus, num_epochs=10, lr_tune=False):
  """
  Function to train the Feynman GNN without a hyperparameter search.
  params = The hyperparameters to use, stored as a dictionary with the notation "model_..."
  """
  #need to make layer type a hyperparameter
  model_params = {k: v[0] for k, v in params.items() if k.startswith("model_")}
  model = FeynModel(c_in=-1, #train_dataset.num_node_features 
                    c_out=1,  #train_dataset.num_classes
                    layer_name="GAT",
                    model_params=model_params,
                    #filename=
                    )
  trainer = pl.Trainer(logger=TensorBoardLogger(CHECKPOINT_PATH, name="tb_logs"),
                       max_epochs=num_epochs,
                       gpus=math.ceil(num_gpus),
                       log_every_n_steps=10,
                       auto_lr_find=True,
                       #progress_bar_refresh_rate=0,
                       #callbacks=[EarlyStopping('val_loss')],
                       )
  if lr_tune==True:
    # Run learning rate finder
    lr_finder = trainer.tuner.lr_find(model, train_loader,val_loader,min_lr=1e-10, max_lr=1e-4)

    # Plot with
    fig = lr_finder.plot(suggest=True)
    fig.show()

    # Pick point based on plot, or get suggestion
    new_lr = lr_finder.suggestion()
    print(new_lr)

    model_params['model_learning_rate']=new_lr

  trainer.fit(model, train_loader, val_loader)
  trainer.validate(model, val_loader)
  trainer.test(model, test_loader)

  return model, trainer


In [21]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
print(f"Torch geometric version: {torch_geometric.__version__}")

if torch.cuda.is_available():
  gpus=1
else:
  gpus=0

Torch version: 1.10.0+cu111
Cuda available: False
Torch geometric version: 2.0.4


#**Create layer dictionary and Hyperparameters**


In [22]:
#layer name dictionary
gnn_layer_by_name = {
    "GCN": geom_nn.GCNConv,
    "GAT": geom_nn.GATConv,
    "GraphConv": geom_nn.GraphConv,
    "NNConv": geom_nn.NNConv,
    "RGCN": geom_nn.RGCNConv,
    "Trans": geom_nn.TransformerConv
}

#Hyperparameters to use if not tuning
HYPERPARAMETERS = {
    "model_batch_size": [80],
    "model_weight_decay": [0.000001],
    "model_learning_rate": [8.128305161640993e-10],
    "model_embedding_size": [2],
    "model_attention_heads": [2],
    "model_layers": [2],
    "model_dropout_rate": [0.7],
    "model_top_k_ratio": [0.6],
    "model_top_k_every_n": [1],
    "model_dense_neurons": [6],
    "model_edge_dim": [11],
    "model_lin_dropout_prob": [0.8],
    }

#Hyperparameters for ray tune to search through
config = {
    "model_batch_size": tune.choice([64]),
    "model_weight_decay": tune.choice([0.000001]),
    "model_learning_rate": tune.loguniform(0.0001,0.1),
    "model_embedding_size": tune.choice([4]),
    "model_attention_heads": tune.choice([4]),
    "model_layers": tune.choice([3]),
    "model_dropout_rate": tune.choice([0.5]),
    "model_top_k_ratio": tune.choice([0.2]),
    "model_top_k_every_n": tune.choice([1]),
    "model_dense_neurons": tune.choice([4]),
    "model_edge_dim": tune.choice([11]),
    "model_lin_dropout_prob": tune.choice([0.3]),
    }

In [None]:
model, trainer = train_feyn_no_tune(HYPERPARAMETERS,gpus,10,lr_tune=False)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  "A layer with UninitializedParameter was found. "

   | Name           | Type        | Params
------------------------------------------------
0  | loss_fn        | MyLoss      | 0     
1  | conv_layers    | ModuleList  | 112   
2  | transf_layers  | ModuleList  | 20    
3  | pooling_layers | ModuleList  | 4     
4  | bn_layers      | ModuleList  | 8     
5  | conv1          | GATConv     | 48    
6  | transf1        | Linear      | 10    
7  | bn1            | BatchNorm1d | 4     
8  | conv_fin       | GATConv     | 28    
9  | linear0        | Linear      | 140   
10 | linear1        | Linear      | 30    
11 | linear2        | Linear      | 42    
12 | linear3        | Linear      | 7     
13 | linear4        | Linear      | 35    
------------------------------------------------
488       Trainable params
0         Non-trainable params
488       Total params
0.002     To

Validation sanity check: 0it [00:00, ?it/s]

tensor([[   0.0000,    0.0000,  260.0260],
        [   0.0000,    0.0000, -260.0260],
        [   0.0000,    0.0000,    0.0000],
        ...,
        [   0.0000,    0.0000,    0.0000],
        [   0.4610,    0.0000,   -1.5786],
        [  -0.4610,    0.0000,    1.5786]])


Global seed set to 2434805482


tensor([[ 0.0000e+00,  0.0000e+00,  3.1003e+02],
        [ 0.0000e+00,  0.0000e+00, -3.1003e+02],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.3960e-01,  0.0000e+00,  3.9580e-02],
        [-1.3960e-01,  0.0000e+00, -3.9580e-02]])


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


Validating: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

#**Predict with last model**

In [None]:
out = trainer.predict(model, dataloaders=pred_loader)
print("(preds, actual)\n",out)

#**TensorBoard Logs and running training**


In [None]:
%tensorboard --logdir /content/gdrive/MyDrive/Part_III_Project/saved_models/tb_logs

#**Tuning the hyperparameters with Ray Tune**

In [None]:
def train_feyn_tune(config, num_epochs=10, num_gpus=0, checkpoint_dir=None):
  """
  function to run a training run that will be called later by the tuning function
  """
  model = FeynModel(c_in=-1, #train_dataset.num_node_features 
                    c_out=1,  #train_dataset.num_classes
                    layer_name="GAT",
                    model_params=config,
                    #filename=
                    )
  trainer = pl.Trainer(logger=TensorBoardLogger(save_dir=tune.get_trial_dir(),
                                                name="",
                                                version="."),
                       max_epochs=num_epochs,
                       gpus=math.ceil(num_gpus),
                       log_every_n_steps=10,
                       #progress_bar_refresh_rate=0,
                       callbacks=[TuneReportCallback({"loss": "val_loss",   
                                                      #"mean_accuracy": "val_acc"
                                                      },
                                                     on="validation_end"),
                                  #EarlyStopping('val_loss',patience=10)
                                  ]
                       )
  trainer.fit(model, train_loader, val_loader)


def tune_feyn_asha(config, gpus_per_trial=0, num_epochs=10, num_samples=10):

    scheduler = ASHAScheduler(
        max_t=num_epochs,
        grace_period=1,
        reduction_factor=2)

    reporter = CLIReporter(
        parameter_columns=[
                           "model_batch_size",
                           "model_weight_decay",
                           "model_learning_rate",
                           "model_embedding_size",
                           "model_attention_heads",
                           "model_layers",
                           "model_dropout_rate",
                           "model_top_k_ratio",
                           "model_top_k_every_n",
                           "model_dense_neurons",
                           "model_edge_dim",
                           "model_lin_dropout_prob"],
        metric_columns=["loss", "training_iteration"])

    train_fn_with_parameters = tune.with_parameters(train_feyn_tune,
                                                    num_epochs=num_epochs,
                                                    num_gpus=gpus_per_trial,
                                                    #checkpoint_dir=CHECKPOINT_PATH,
                                                    )
    
    resources_per_trial = {"cpu": 1, "gpu": gpus_per_trial}

    analysis = tune.run(train_fn_with_parameters,
        resources_per_trial=resources_per_trial,
        metric="loss",
        mode="min",
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        name="tune_mnist_asha")

    print("Best hyperparameters found were: ", analysis.best_config)


In [None]:
tune_feyn_asha(config, gpus_per_trial=gpus)