In [1]:
## Mention the required packages here ##
import os
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

##[Important] Please Read##
1.  Download the folder: https://drive.google.com/drive/folders/16pnVqej_NNPEw8DHg5XslY6nmAbzzDLB?usp=sharing
2. Upload it in your personal drive
3. You can push your caller functions to dev-madhura branch. 
4. In the coming cells, we clone the repo.
5. So you can refer to the ".pkl" files which are uploaded in the folder of step 1 by using the function calls from the repo
6. Ask me if you run into queries. 

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
! git clone -b dev-madhura https://github.com/PalAvik/lrgb-uva-dl2-11.git

Cloning into 'lrgb-uva-dl2-11'...
remote: Enumerating objects: 961, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (227/227), done.[K
remote: Total 961 (delta 227), reused 310 (delta 162), pack-reused 561[K
Receiving objects: 100% (961/961), 30.35 MiB | 23.95 MiB/s, done.
Resolving deltas: 100% (520/520), done.


In [None]:
RESULTS_FOLDER_PATH = "/content/gdrive/MyDrive/DL2_team11/results"
MODEL_CHECKPOINTS_PASCAL = "{}/model checkpoints/{}".format(RESULTS_FOLDER_PATH, "pascal")
MODEL_CHECKPOINTS_COCO = "{}/model checkpoints/{}".format(RESULTS_FOLDER_PATH, "coco")

In [None]:
def get_model_name(name):
    dict_ = {
         "EGNN": "E(n)-Equivariant",
         "ENN": "E(n)-Invariant",
         "GCN": name,
         "Transformer+LapPE": name,
         "SCGNN": "E(3) Steerable"
    }
    return dict_[name]
def read_log_file(log_file):
    with open(log_file) as f:
      f = f.readlines()
      f_best = [f_line for f_line in f if "best so far" in f_line.lower()][-1]   
      f_param = [f_line for f_line in f if "params" in f_line.lower()][-1] 
      train_f1 = f_best.split("train_f1:")[-1].split()[0]
      val_f1 = f_best.split("val_f1:")[-1].split()[0]
      test_f1 = f_best.split("test_f1:")[-1].split()[0]
      params = f_param.split("'params':")[-1].split()[0].replace(",","")
      params = str(int(int(params)/1000)) + "k"
      return {"train":train_f1, "val":val_f1, "test":test_f1, "params":str(params)}



def get_statistics(model_path):
   dict_table = {"Model":[], "Params":[], "Best Train F1":[],
                "Best Val F1":[], "Best Test F1":[]}
   for result_folder in os.listdir(model_path):
       log_file_path = "{}/{}/0/logging.log".format(model_path,result_folder)
       if os.path.exists(log_file_path):
          stats = read_log_file(log_file_path)
          model_name = get_model_name(result_folder.split("-")[-1])
          dict_table["Model"].append(model_name)
          dict_table["Best Train F1"].append(stats["train"])
          dict_table["Best Val F1"].append(stats["val"])
          dict_table["Best Test F1"].append(stats["test"])
          dict_table["Params"].append(stats["params"])
   df = pd.DataFrame(data=dict_table) 
   df = df.sort_values(by=['Best Test F1'])
    #  df = df.style.applymap("green", subset=['Best Test F1'])
   if "pascal" in model_path: cmap = 'YlGn'
   else: cmap = 'Blues'
   df = df.style.background_gradient(axis=0, gmap=df['Best Test F1'], cmap=cmap)
   return df



## Results of trained models ##

In [None]:
df_pascal = get_statistics(MODEL_CHECKPOINTS_PASCAL)

In [None]:
df_pascal

Unnamed: 0,Model,Params,Best Train F1,Best Val F1,Best Test F1
2,GCN,495k,0.4152,0.1561,0.1594
1,E(n)-Invariant,522k,0.3608,0.2173,0.2223
0,E(n)-Equivariant,522k,0.3767,0.2434,0.2515
4,E(3) Steerable,590k,0.4071,0.2559,0.2599
3,Transformer+LapPE,501k,0.8115,0.2747,0.265


In [None]:
df_coco = get_statistics(MODEL_CHECKPOINTS_COCO)

In [None]:
df_coco

Unnamed: 0,Model,Params,Best Train F1,Best Val F1,Best Test F1
1,GCN,495k,0.3114,0.1912,0.1978
2,E(n)-Invariant,522k,0.371,0.2829,0.2785
0,E(n)-Equivariant,522k,0.4473,0.3126,0.3038
3,Transformer+LapPE,501k,0.5303,0.3372,0.3114
4,E(3) Steerable,590k,0.362,0.3126,0.316


## Rewiring ##

## Influence Scores ##

In [6]:
from ..lrgb-uva-dl2-11.analysis.influence import process_all_graphs, plot_mean_influence_by_distance

MODEL_CHECKPOINT_GCN= "{}/influence scores/{}".format(RESULTS_FOLDER_PATH, "inf_scores_gcn_with_adj.pkl") 
influence_df_gcn = process_all_graphs(MODEL_CHECKPOINT_GCN, normalise=True)

fig, ax = plt.subplots()
plot_mean_influence_by_distance(influence_df_gcn, ax, 'GCN')

ax.set_xlabel('Shortest path distance from target node')
ax.set_ylabel('Proportion of total gradient')
ax.legend()

ImportError: ignored

## Noise Experiments ##

## Amity Experiments ##

### Model accuracies comparison 

### Bottlenecking in the graphs

## Conclusion ##