# Profiling summary
* Ivy bridge performs better than Knights Landing in terms of runtime
* Layer-norm calculation is the third expensive calculation after backward propagation and scatter_add

In [10]:
import cProfile as p
import pstats

st = {'iv24' : pstats.Stats('results/gnn_profile_output_iv24'),
      'knl' : pstats.Stats('results/gnn_profile_output_knl')}

In [11]:
for s, o in st.items():
    print("Hardwire: {}, Total time: {}".format(s, o.total_tt))

Hardwire: iv24, Total time: 499.2061369999893
Hardwire: knl, Total time: 3355.2917809999926


In [6]:
iv24 = st["iv24"]
knl = st["knl"]

Tue Oct 29 23:26:57 2019    results/gnn_profile_output_knl

         10029108 function calls (9886043 primitive calls) in 3355.292 seconds

   Ordered by: internal time
   List reduced from 9579 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      250 2437.049    9.748 2437.049    9.748 {method 'run_backward' of 'torch._C._EngineBase' objects}
     6000  490.209    0.082  490.209    0.082 {method 'scatter_add_' of 'torch._C._TensorBase' objects}
    22500  183.695    0.008  183.695    0.008 {built-in method layer_norm}
     9750   78.782    0.008   78.782    0.008 {built-in method cat}
    25875   20.474    0.001   20.474    0.001 {built-in method addmm}
     3375   16.368    0.005  230.170    0.068 agnn.py:33(forward)
    22500   15.581    0.001   15.581    0.001 {built-in method tanh}
     3000   11.375    0.004  592.542    0.198 agnn.py:56(forward)
     8764    7.868    0.001    7.868    0.001 {built-in method posix.stat}
  205

<pstats.Stats at 0x1116b9208>

## KNL stats

In [7]:
knl.strip_dirs().sort_stats("tottime").print_stats(10)

Tue Oct 29 23:26:57 2019    results/gnn_profile_output_knl

         10029108 function calls (9886043 primitive calls) in 3355.292 seconds

   Ordered by: internal time
   List reduced from 9579 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      250 2437.049    9.748 2437.049    9.748 {method 'run_backward' of 'torch._C._EngineBase' objects}
     6000  490.209    0.082  490.209    0.082 {method 'scatter_add_' of 'torch._C._TensorBase' objects}
    22500  183.695    0.008  183.695    0.008 {built-in method layer_norm}
     9750   78.782    0.008   78.782    0.008 {built-in method cat}
    25875   20.474    0.001   20.474    0.001 {built-in method addmm}
     3375   16.368    0.005  230.170    0.068 agnn.py:33(forward)
    22500   15.581    0.001   15.581    0.001 {built-in method tanh}
     3000   11.375    0.004  592.542    0.198 agnn.py:56(forward)
     8764    7.868    0.001    7.868    0.001 {built-in method posix.stat}
  205

<pstats.Stats at 0x1116b9208>

## IVY 24 stats

In [9]:
iv24.strip_dirs().sort_stats("tottime").print_stats(10)

Tue Oct 29 23:26:57 2019    results/gnn_profile_output_iv24

         10213373 function calls (10070319 primitive calls) in 499.206 seconds

   Ordered by: internal time
   List reduced from 9557 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      250  330.024    1.320  330.024    1.320 {method 'run_backward' of 'torch._C._EngineBase' objects}
     6000   51.162    0.009   51.162    0.009 {method 'scatter_add_' of 'torch._C._TensorBase' objects}
    22500   44.128    0.002   44.128    0.002 {built-in method layer_norm}
     9750   22.587    0.002   22.587    0.002 {built-in method cat}
    25875   14.368    0.001   14.368    0.001 {built-in method addmm}
    22500    7.595    0.000    7.595    0.000 {built-in method tanh}
     3375    5.293    0.002   67.607    0.020 agnn.py:33(forward)
     3000    3.720    0.001   80.227    0.027 agnn.py:56(forward)
  205/204    1.694    0.008    1.696    0.008 {built-in method _imp.create_dyna

<pstats.Stats at 0x110ec0c50>

## Configuration of the experiments

* Launch command  
```
srun -N 1 -n 1 --ntasks-per-node 1 python -m cProfile -s time -o profile_output train.py configs/agnn-sr.yaml
```
* Training records: 200
* Validation records: 100 
* Training n_epochs: 5
