In [None]:
!pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
 # Install required packages.
!pip install -q torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cu111.html
!pip install -q pytorch-lightning
!pip install torchmetrics

In [None]:
import torch
import torch.nn.functional as F 
import torch_geometric.transforms as T
import torch.optim as optim
import torchmetrics
from torch.optim import lr_scheduler
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
import torch_geometric.nn
from torch_geometric.nn import GraphConv, TopKPooling, SplineConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

from sklearn.model_selection import KFold

import sys
import numpy as np
import math

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

In [None]:
from math import sin, cos
import copy
import random

def rotation(graph):
  theta = np.random.uniform(-Theta, Theta)
  a = np.array([[cos(theta), -sin(theta)],
                [sin(theta), cos(theta)]])
  
  g = graph.edge_attr[:,:2]
  p = np.apply_along_axis(np.dot, 1, g, a)
  graph.edge_attr[:,:2] = torch.from_numpy(p[:,:2])
  return graph

def scaling(graph):
  s = np.random.uniform(1/S, S, size=2)
  a = np.array([[s[0], 0],
                [0, s[1]]])
  
  g = graph.edge_attr[:,:2]
  p = np.apply_along_axis(np.dot, 1, g, a)
  graph.edge_attr[:,:2] = torch.from_numpy(p[:,:2])
  return graph

def translation(p):
  if p[0]==0 and p[1]==0:
    return np.asarray(p)
  else:
    return np.asarray(p) + np.random.uniform(-TT, TT, size=2)

# TT = 0.1;
# Theta = 0.6;
# S = 1.4;

TT = 0;
Theta = 0;
S = 1;

class my_affine_transforms(T.BaseTransform):
  def __init__(self, args = []):
    pass

  def __call__(self, data: torch_geometric.data.data.Data):
    data = rotation(data)
    data = scaling(data)

    data.edge_attr[:, :2] = torch.from_numpy(np.apply_along_axis(translation, 1, data.edge_attr[:, :2]))

    # data.edge_attr[:, :2] = torch.from_numpy(np.apply_along_axis(trnsfrms, 1, data.edge_attr[:, :2]))
    
    # for p in data.edge_attr:
    #   if p[0] != 0 or p[1] != 0:
    #     p[:2] = torch.from_numpy(rotation(p[:2]))
    #     p[:2] = torch.from_numpy(scaling(p[:2]))
    #     p[:2] = torch.from_numpy(translation(p[:2]))
    return data
  
  def __repr__(self) -> str:
    return (f'{self.__class__.__name__}')

In [None]:
class my_normalization(T.BaseTransform):
  def __init__(self, args = []):
    pass

  def my_norm(x, a, b):
    return (x-a)/(b-a)

  def __call__(self, data: torch_geometric.data.data.Data):
    my_norm_vect = np.vectorize(my_normalization.my_norm)
    x = np.array(data.edge_attr)
    x[:,0] = my_norm_vect(x[:,0],  min(x[:,0]), max(x[:,0]))
    x[:,1] = my_norm_vect(x[:,1],  min(x[:,1]), max(x[:,1]))
    data.edge_attr[:, 0] = torch.from_numpy(x[:, 0])
    data.edge_attr[:, 1] = torch.from_numpy(x[:, 1])
    return data

  def __repr__(self) -> str:
    return (f'{self.__class__.__name__},')

In [None]:
dataset = TUDataset(root='data/TUDataset', name='Cuneiform', transform=T.Compose([my_normalization()]))

In [None]:
class Net(pl.LightningModule):
    def __init__(self):
        super(Net, self).__init__()

        # kernel_size = 5
        kernel_size = 5

        dim=dataset.num_edge_features
        degree = 1

        self.conv1 = SplineConv(dataset.num_features, 32, dim=dim, kernel_size=kernel_size, degree=degree)
        self.conv2 = SplineConv(32, 64, dim=dim, kernel_size=kernel_size, degree=degree)
        self.conv3 = SplineConv(64, 64, dim=dim, kernel_size=kernel_size, degree=degree)
        
        self.lin1 = torch.nn.Linear(64, dataset.num_classes)

        self.dropout = 0.2

        self.train_accuracy = torchmetrics.Accuracy()
        self.val_accuracy = torchmetrics.Accuracy()
        self.test_accuracy = torchmetrics.Accuracy()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        pseudo = data.edge_attr

        in_dropout = 0.2
        # in_dropout = 0.5
        # in_dropout = 0.3

        x = F.elu(self.conv1(x, edge_index, pseudo))
        x = F.dropout(x, in_dropout, training = self.training)
        x = F.elu(self.conv2(x, edge_index, pseudo))
        x = F.dropout(x, in_dropout, training = self.training)
        x = F.elu(self.conv3(x, edge_index, pseudo))
        # x = F.dropout(x, in_dropout, training = self.training)
        
        # x = F.elu(self.conv4(x, edge_index, pseudo))
        # x = F.dropout(x, self.dropout, training = self.training)

        x = gap(x, batch)

        x = F.dropout(x, self.dropout, training = self.training)

        x = F.log_softmax(self.lin1(x), dim=1)
        return x

    def training_step(self, batch, batch_idx):
      output = self(batch)
      # loss = F.nll_loss(output, batch.y)
      loss = F.cross_entropy(output, batch.y)
      self.log('training_loss', loss, on_epoch=True, on_step=False)
      self.log('train_acc_step', self.train_accuracy(output, batch.y))
      return loss

    def on_train_epoch_end(self):
      self.log('train_acc_epoch', self.train_accuracy.compute())

    def configure_optimizers(self):
      optimizer = torch.optim.Adam(self.parameters(), lr=0.01)
      # "an initial learning rate of 0.01 and learning rate decay to 0.001 after 200 epochs"
      scheduler = lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[200], gamma = 0.1)
      return {
        "optimizer": optimizer,
        "lr_scheduler": scheduler
      }

    def validation_step(self, batch, batch_idx):
      output = self(batch)
      # loss = F.nll_loss(output, batch.y)
      loss = F.cross_entropy(output, batch.y)
      self.log('val_loss', loss, on_epoch=True, on_step=False)
      self.log('val_acc_step', self.val_accuracy(output, batch.y))
      return loss

    def on_validation_epoch_end(self):
      self.log('val_acc_epoch', self.val_accuracy.compute())

    def test_step(self, batch, batch_idx):
      output = self(batch)
      # loss = F.nll_loss(output, batch.y)
      loss = F.cross_entropy(output, batch.y)
      self.log('test_loss', loss, on_epoch=True, on_step=False)
      self.log('test_acc_step', self.test_accuracy(output, batch.y))
      return loss

    def on_test_epoch_end(self):
      self.log('test_acc_epoch', self.test_accuracy.compute())

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

In [None]:
import numpy as np
from datetime import datetime
p = '/content/drive/My Drive/results/' + str(datetime.now())+'.txt'

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
!rm -r /content/checkpoints/
k_folds = 10
max_epochs = 300
# 20000
# 300
results = []
tstamps = []
batch_size = 64

TT = 0.1;
Theta = 0.06;
S = 2;  

# TT = 0.1;
# Theta = 0.6;
# S = 1.04;


early_stopping = False
np_random = True
kfold_random = True

# kfold = KFold(n_splits=k_folds, shuffle=kfold_random, random_state=0)
kfold = KFold(n_splits=k_folds, shuffle=kfold_random)

dataset = TUDataset(root='data/TUDataset', name='Cuneiform', transform=T.Compose([my_normalization()]))
augmentation_dataset = TUDataset(root='data/TUDataset', name='Cuneiform', transform=T.Compose([my_affine_transforms(), my_normalization()]))

# np.random.seed(12345)
model = None

for i in range(1,11):
  results.append({})
  tstamps.append([])
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
  # if fold != 0:
  #   continue
  # Print
  print(f'FOLD {fold}')
  print('--------------------------------')

  tstamps[fold].append(datetime.now())

  del model 
  model = Net()

  if early_stopping:
    if np_random:
      np.random.shuffle(train_ids)
    
    val_size = len(train_ids) // 15
    val_ids = list(train_ids[:val_size])
    train_ids = list(train_ids[val_size:])
    val_loader = DataLoader(dataset[val_ids], batch_size=batch_size)

    dataset = TUDataset(root='data/TUDataset', name='Cuneiform', transform=T.Compose([my_normalization()]))
    # augmentation_dataset = TUDataset(root='data/TUDataset', name='Cuneiform', transform=T.Compose([my_affine_transforms(), my_normalization()]))

    test_loader = DataLoader(dataset[test_ids], batch_size=batch_size)
    # val_loader = test_loader # DataLoader(dataset[test_ids], batch_size=batch_size)
    train_loader = DataLoader(dataset[train_ids], batch_size=batch_size)
    augmented_train_loader = DataLoader(augmentation_dataset[train_ids], batch_size=batch_size)

    # model.apply(reset_weights)    
    early_stop_callback = EarlyStopping(monitor="val_acc_epoch", 
                                        min_delta=0.00, 
                                        patience=1000, 
                                        verbose=False, 
                                        mode="max")
    checkpoint_callback = ModelCheckpoint(dirpath="checkpoints",
                                          filename="best-checkpoint-fold-"+str(fold),
                                          save_top_k=1,
                                          verbose=False,
                                          monitor="val_acc_epoch",
                                          mode="max",
                                          every_n_epochs = 1,
                                          save_last=True)

    logger = TensorBoardLogger("lightning_logs", name="model", default_hp_metric= False)
    trainer = pl.Trainer(gpus=1, # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                        logger=logger,
                        callbacks=[early_stop_callback, checkpoint_callback], 
                        # callbacks=[checkpoint_callback], 
                        check_val_every_n_epoch=1,
                        max_epochs=max_epochs)
    trainer.fit(model=model, train_dataloaders=train_loader, val_dataloaders=val_loader)
    
    checkpoint = torch.load("/content/checkpoints/best-checkpoint-fold-"+str(fold)+".ckpt")
    model.load_state_dict(checkpoint['state_dict'])  
  else:  
    if np_random:
      np.random.shuffle(train_ids)

    test_loader = DataLoader(dataset[test_ids], batch_size=batch_size)
    train_loader = DataLoader(dataset[train_ids], batch_size=batch_size)
    augmented_train_loader = DataLoader(augmentation_dataset, batch_size=batch_size)

    model.apply(reset_weights)

    logger = TensorBoardLogger("lightning_logs", name="model", default_hp_metric= False)
    trainer = pl.Trainer(gpus=1,# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!11 
                         logger=logger, 
                         check_val_every_n_epoch=1, 
                         max_epochs=max_epochs,
                         replace_sampler_ddp=True,
                         )

    trainer.fit(model=model, train_dataloaders=train_loader)

    # trainer.fit(model=model, train_dataloader=augmented_train_loader)
  # ---------------------- #  

  # Завантажує кращу модель у фолді, і проводить тест на ній.
    
  tstamps[fold].append(datetime.now())
  # results.append({})
  results[fold]['epoch'] = model.current_epoch
  results[fold]['accuracy'] = trainer.test(model=model, dataloaders=test_loader)
  print(results[fold])
  with open(p, 'w') as f:
    f.write(str(results))
    f.write("\n\n")
    f.write(str(tstamps))
  # if fold == 0:
  #   break;

!cp -r /content/checkpoints/ /content/drive/MyDrive/Masters/checkpoints+$(date +"%Y-%m-%d-%T")
!cp -r lightning_logs/model/ /content/drive/MyDrive/Masters/lightning_logs+$(date +"%Y-%m-%d-%T")

In [None]:
!nvidia-smi -L

In [None]:
!nvidia-smi