# Perfomance Analysis

In [23]:
import pandas as pd

## Sequential Model

In [24]:
LOG_FILE = '../log/seq.csv'

df = pd.read_csv(LOG_FILE)
df['phase'] = df.replace({'phase': {0: 'train', 1: 'val'}})['phase']
df.head(n=10)

Unnamed: 0,epoch,batch,batch_time(s),phase
0,0,0,25.97975,train
1,0,1,26.016988,train
2,0,2,26.434223,train
3,0,3,25.707898,train
4,0,4,25.592377,train
5,0,5,25.641662,train
6,0,6,25.612364,train
7,0,7,25.624064,train
8,0,8,25.591709,train
9,0,9,25.61215,train


In [25]:
df[df['phase'] == 'train']['batch_time(s)'].describe()

count    250.000000
mean      26.080359
std        0.344749
min       25.577825
25%       25.981775
50%       26.037149
75%       26.124101
max       28.670659
Name: batch_time(s), dtype: float64

In [26]:
df[df['phase'] == 'val']['batch_time(s)'].describe()

count    50.000000
mean     10.234476
std       0.191090
min      10.128167
25%      10.179445
50%      10.208933
75%      10.232612
max      11.475606
Name: batch_time(s), dtype: float64

In [34]:
df.groupby('epoch')['batch_time(s)'].sum().describe()

count       5.000000
mean     1406.362735
std         4.620646
min      1401.187116
25%      1402.878305
50%      1405.472299
75%      1410.945040
max      1411.330913
Name: batch_time(s), dtype: float64

## Pipeline Model

It is necessary to study the computational time and space of the different types of layers before splitting the model. Considering that the studied model is composed of GATConv, Linear layer and Global Pool (for the graph embedding), the studies will be carried out on these three types of layers

In [17]:
from torchvision.datasets import CIFAR10
import torchvision.transforms as T
import torch
import tracemalloc
import time
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_add_pool
from torch_geometric.loader import DataLoader

from data import CustomDataset, image_to_graph

In [10]:
transform = T.ToTensor() #TODO: add the super pixel part

train_dataset = CIFAR10(root='../data', train=True, download=False, transform=transform)

dataset = CustomDataset(image_to_graph(train_dataset), length=100)

loader = DataLoader(dataset, batch_size=50, shuffle=True)

## GATConv Layer

### 3x320 

In [18]:
layer = GATConv(3, 320, heads=1)

tracemalloc.start()

data = next(iter(loader))

start_time = time.time()

output = layer(data.x, data.edge_index)
output = F.relu(output)

end_time = time.time()

current, peak = tracemalloc.get_traced_memory()

print(f"Time: {end_time - start_time:.2f} s")
print(f"Current memory usage: {current / 1024 ** 2:.2f} MB")
print(f"Peak memory usage: {peak / 1024 ** 2:.2f} MB")

tracemalloc.stop()

Time: 0.49 s
Current memory usage: 0.01 MB
Peak memory usage: 0.02 MB


### 320x320

### Global Add Pool

### Linear Layer