In [1]:
import pandas as pd
import numpy as np
import json
import seaborn as sns
import matplotlib.pyplot as plt
import os
# import validators
import requests
from tqdm import tqdm
from treelib import Node, Tree

- Some remaining questions?
    - sind **dataset, paper** die offenbar nur als Strings gegeben sind, eindeutig auf deren URLs zurückzuführen?
    - gibt es eine Überschneidung zwischen model_name-Angaben und method?


In [2]:
with open('/home/ls3data/datasets/paperswithcode/evaluation-tables.json') as f:
    evaluation_tables = json.load(f)

## 1. Tasks and Subtasks (sub-subtasks etc.)

### 1.1 Structure - tree / loop?

In [3]:
def get_all_relations(task_list):
    parents_children = []
    for task in task_list:        
        subts = get_all_relations(task['subtasks'])        
        parents_children += subts        
        for subtask_name in [sub['task'] for sub in task['subtasks']]:
            parents_children.append([task['task'], subtask_name]) 
    return parents_children

# get all tasks (the first level in evaluation_tables)
def get_all_tasks(evaluation_tables):
    tasks = []
    for task in evaluation_tables:
        tasks.append(task["task"])
    return tasks 

tasks = get_all_tasks(evaluation_tables)
parents_children= get_all_relations(evaluation_tables)
# len(parents_children)

print(f"there are {len(tasks)} tasks (the first level) in total \n (some of them are the subtasks of others, but 'evaluation_tables' stored them on the same level)")

there are 1280 tasks (the first level) in total 
 (some of them are the subtasks of others, but 'evaluation_tables' stored them on the same level)


In [4]:
# note that tasks[1075] is empty, so remove it
tasks.remove("")

# now there are 1279 tasks
len(tasks)

1279

In [5]:
# always keep the follwing code in the same cell as the function "create_nodes_for_next_level"
tree = Tree()
tree.create_node("task_tree", "task_tree")
for task in tasks:
    tree.create_node(task, task, parent="task_tree")

def create_nodes_for_next_level(combis, tree):

    repeated_children = []
    next_level_combis = [] # combi means a [parent, child] pair
    for combi in combis:
        parent = combi[0]
        child = combi[1]  
        
        if parent != "" and child !="":
            # the list "tasks" contains tasks on the highest level (I'll call it the first level)
            if parent in tasks:
                # now try to create Nodes for those "second level task"  (children of the first level)
                try:
                    tree.create_node(child, child, parent=parent)
                # some subtasks already exist on a higher level, so they can not be created:
                except:
                    # repeated_children will record the repeated children and their parents
                    repeated_children.append(combi)
            else:
                next_level_combis.append(combi)  

    return next_level_combis, repeated_children

next_level_combis, repeated_children = create_nodes_for_next_level(parents_children, tree)

In [6]:
# actually all the parents in "parents_children" are contained on the first level tasks of "evaluation_tables"
next_level_combis

[]

### 1.1.1 Tree structure with "one generation"

In [7]:
# A "one-generation" tree with all 1280 tasks as parent nodes
tree.show()

task_tree
├── 2D Human Pose Estimation
│   └── Semi-Supervised Human Pose Estimation
├── 2D Object Detection
│   ├── Defect Detection
│   ├── Hybrid Positioning
│   ├── Low-light Pedestrian Detection
│   ├── Semi-Supervised Person Bounding Box Detection
│   └── Webpage Object Detection
├── 2D Semantic Segmentation
├── 3D
│   ├── 3D Feature Matching
│   ├── 3D Geometry Perception
│   ├── 3D Object Retrieval
│   ├── 3D Plane Detection
│   ├── 3D Point Cloud Matching
│   ├── 3D Shape Classification
│   ├── 3D Shape Recognition
│   ├── 3D Volumetric Reconstruction
│   ├── Classify 3D Point Clouds
│   ├── Generating 3D Point Clouds
│   ├── Multi-View 3D Shape Retrieval
│   ├── Neural Rendering
│   ├── Point Set Upsampling
│   └── Underwater 3D Scene Reconstruction
├── 3D Absolute Human Pose Estimation
├── 3D Action Recognition
│   ├── Generalized Zero Shot skeletal action recognition
│   └── Zero Shot Skeletal Action Recognition
├── 3D Bin Packing
├── 3D Canonical Hand Pose Estimation
├── 3

- **First conclusions:**
    1. all tasks (inlcuding subtasks) in the ``evaluation_tables`` can actually be represented in the above "one generation" tree structure (those 1279 tasks in the ``evaluation_tables``(first level) are actually all "parents" who has at least one subtask)
    2. except that some tasks on the same parent level are actually children of other tasks, to be more precise, the children contained in ``repeated_children`` belong to this type of tasks (further analysis as following)

### 1.1.2 Tree structure with "multiple generations"

In [8]:
# get these children that already exist on the first level
repeated_children_names = np.array(repeated_children)[:,1]

# some names show up more than once (as children):
# these phenomenon will be analysed later...
len(set(repeated_children_names)) < len(repeated_children_names)

True

In [9]:
# delete the repeated children
tasks_no_repetition = list(set(tasks) - set(repeated_children_names))

# always keep the follwing code in the same cell as the function "create_nodes_for_next_level"
tree_multi_level = Tree()
tree_multi_level.create_node("task_tree", "task_tree")
for task in tasks_no_repetition:
    tree_multi_level.create_node(task, task, parent="task_tree")

next_level_combis, children_with_no_nodes = create_nodes_for_next_level(parents_children, tree_multi_level)

# all pairs have been given a Node in the tree sucessfully with calling 
# the function "create_nodes_for_next_level" only one time
next_level_combis

[]

In [10]:
## there are actually a few tasks in "children_with_no_nodes" could still be given Nodes in the tree
## I'll skip those since the detailed tree structure does not influnce the conclusion

#a, b = create_nodes_for_next_level(children_with_no_nodes, tree_multi_level)
#c, d = create_nodes_for_next_level(b, tree_multi_level)

In [11]:
# a tree with multiple level:
# one could find an example such as the first task: "Optical Character Recognition",
# now all its subtasks are in the tree
tree_multi_level.show()

task_tree
├── 2D Semantic Segmentation
├── 3D
│   ├── 3D FACE MODELING
│   │   └── Facial Recognition and Modelling
│   │       ├── Action Unit Detection
│   │       ├── Age And Gender Classification
│   │       ├── Age Estimation
│   │       ├── Face Alignment
│   │       ├── Face Detection
│   │       │   └── Occluded Face Detection
│   │       ├── Face Generation
│   │       │   ├── Face Age Editing
│   │       │   ├── Talking Face Generation
│   │       │   │   └── Constrained Lip-synchronization
│   │       │   └── Talking Head Generation
│   │       │       └── Unconstrained Lip-synchronization
│   │       ├── Face Hallucination
│   │       ├── Face Identification
│   │       ├── Face Recognition
│   │       │   ├── Age-Invariant Face Recognition
│   │       │   ├── Face Quality Assessement
│   │       │   └── MASKED FACE RECOGNITION
│   │       ├── Face Reconstruction
│   │       ├── Face Swapping
│   │       ├── Facial Action Unit Detection
│   │       ├── Facial Attribute Clas

- **Second Conclusion**:
    - **All the tasks can definitely be constructed in the above tree structure, but this analysis is flawed because it did not take potential loop of tasks into consideration.** 
    - E.g. the logic of function ``create_nodes_for_next_level`` is to create a Node for the first parent that shows up, so suppose a relation with a loop: [A, B] and [B, A] coexist, which means A is both the parent and child of B, if [A, B] shows up first in the list ``parents_children``, Node will be created for [A, B], and relation [B, A] will end up in ``repeated_children`` since no Node could be created for [B, A]
    - So next step will analyse the potential loop structure based on ``repeated_children``

### 1.1.3 Evidence of loop structure

In [12]:
# some tasks have the same subtasks:
same_children = []
for pair in repeated_children:
    child = pair[1]
    if list(repeated_children_names).count(child) > 1:
        same_children.append(pair)
same_children = np.array(same_children)
same_children

array([['Depth Estimation', '3D Depth Estimation'],
       ['Face Generation', 'Talking Face Generation'],
       ['Face Generation', 'Talking Head Generation'],
       ...,
       ['Text Generation', 'Multi-Document Summarization'],
       ['Text Generation', 'Text Style Transfer'],
       ['Text Generation', 'Table-to-Text Generation']], dtype='<U69')

In [13]:
# 'Keypoint Detection' -> '2D Human Pose Estimation' -> 'Pose Estimation' -> '3D Human Pose Estimation' ->
# -> '3D Absolute Human Pose Estimation' -> '3D Face Reconstruction' 
# -> 'Facial Recognition and Modelling' ->  'Face Generation' -> 'Talking Face Generation'

logic1 = np.array(same_children)[:,1] ==  'Pose Estimation' 
np.array(same_children)[logic1]

array([['2D Human Pose Estimation', 'Pose Estimation'],
       ['2D Human Pose Estimation', 'Pose Estimation']], dtype='<U69')

In [14]:
logic2 = np.array(same_children)[:,1] ==  '2D Human Pose Estimation' 
np.array(same_children)[logic2]

array([['Keypoint Detection', '2D Human Pose Estimation'],
       ['Keypoint Detection', '2D Human Pose Estimation']], dtype='<U69')

In [15]:
logic3 = np.array(same_children)[:,1] ==  'Keypoint Detection'
np.array(same_children)[logic3]

array([['Pose Estimation', 'Keypoint Detection'],
       ['Pose Estimation', 'Keypoint Detection']], dtype='<U69')

- **Conclusion:** 
    - By manully inspecting some tasks that share the same children/subtasks, an evidence that loop structrue exist could be e.g., 'Pose Estimation' has parent '2D Human Pose Estimation', which has parent 'Keypoint Detection', but 'Keypoint Detection' is again the child of 'Pose Estimation'
    

### 1.2 Unique / Valid URL of Tasks ((sub)subtasks)

In [16]:
subtasks = []
for pair in parents_children:
    subtasks.append(pair[1])

tasks_all = list(set(subtasks + tasks))
print(f"There are {len(tasks_all)} tasks in total (subtasks included)")

def url_not_valid(prefix, surfix):
    status = requests.get(prefix+surfix.lower().replace(" ", "-")).status_code
    if status == 404:
        return True

def url_test(tasks_all):
    no_valid_url = []
    prefix = "https://paperswithcode.com/task/"
    for task in tqdm(tasks_all):
        not_valid = url_not_valid(prefix, task)
        if not_valid:
            no_valid_url.append(task)
    return no_valid_url

tasks_with_no_valid_url = url_test(tasks_all)        

There are 2192 tasks in total (subtasks included)


100%|███████████████████████████████████████| 2192/2192 [09:49<00:00,  3.72it/s]


In [17]:
len(tasks_with_no_valid_url)

147

## 2. Category Structure

In [18]:
def get_all_relations(task_list):
    parent_children_categories = []
    for task in task_list:        
        subts = get_all_relations(task['subtasks'])        
        parent_children_categories += subts          
        for sub in task['subtasks']:
            parent_children_categories.append(
                {task['task']: task["categories"], 
                sub['task']:sub["categories"]}
            )                       
    return parent_children_categories

parent_children_categories = get_all_relations(evaluation_tables)

In [19]:
categories_all = []
same_cat = []
reduced_cat = []
diff_cat = []

for pair in parent_children_categories:
    cat_parent_child = []
    for categories in pair.values():
        categories_all += categories
        cat_parent_child.append(categories)
    
    cat_parent = cat_parent_child[0]
    cat_child = cat_parent_child[1]
    
    if cat_parent != [] and cat_child != []:   
        intersection = set(cat_parent).intersection(set(cat_child))
        if cat_parent == cat_child:
            same_cat.append(pair)
            
        elif intersection==set(cat_child):
            reduced_cat.append(pair)
            
        else:
            diff_cat.append(pair)      

In [20]:
print(f"There are {len(set(categories_all))} different categories in total:")
print(set(categories_all))
print('-'*100)
print(f"Except those subtasks who do not have info about their categories, there are {len(same_cat)} subtasks that have exactly the same categories with their parents; {len(reduced_cat)} subtasks have a reduced set of their parents' categories; {len(diff_cat)} subtasks have different categories with parents (totally or partly)")

There are 16 different categories in total:
{'Knowledge Base', 'Playing Games', 'Audio', 'Methodology', 'Miscellaneous', 'Music', 'Time Series', 'Graphs', 'Reasoning', 'Natural Language Processing', 'Speech', 'Adversarial', 'Medical', 'Robots', 'Computer Code', 'Computer Vision'}
----------------------------------------------------------------------------------------------------
Except those subtasks who do not have info about their categories, there are 447 subtasks that have exactly the same categories with their parents; 58 subtasks have a reduced set of their parents' categories; 56 subtasks have different categories with parents (totally or partly)


In [21]:
reduced_cat

[{'Cancer': ['Medical', 'Knowledge Base'],
  'Breast Cancer Histology Image Classification': ['Knowledge Base']},
 {'Autonomous Driving': ['Computer Vision', 'Miscellaneous'],
  'Motion Forecasting': ['Computer Vision']},
 {'Domain Adaptation': ['Methodology', 'Computer Vision'],
  'Universal Domain Adaptation': ['Computer Vision']},
 {'Dialogue': ['Natural Language Processing', 'Speech'],
  'Dialogue Generation': ['Natural Language Processing']},
 {'Dialogue': ['Natural Language Processing', 'Speech'],
  'Dialogue Understanding': ['Natural Language Processing']},
 {'Medical Image Segmentation': ['Medical', 'Computer Vision'],
  '3D Medical Imaging Segmentation': ['Medical']},
 {'Medical Image Segmentation': ['Medical', 'Computer Vision'],
  'Pulmorary Vessel Segmentation': ['Computer Vision']},
 {'Anomaly Detection': ['Methodology', 'Computer Vision', 'Miscellaneous'],
  'Unsupervised Anomaly Detection': ['Miscellaneous']},
 {'Anomaly Detection': ['Methodology', 'Computer Vision', 'Mi

In [22]:
diff_cat

[{'Stock Market Prediction': ['Time Series'],
  'Stock Prediction': ['Natural Language Processing']},
 {'Autonomous Vehicles': ['Computer Vision'],
  'Autonomous Driving': ['Computer Vision', 'Miscellaneous']},
 {'Autonomous Vehicles': ['Computer Vision'],
  'Autonomous Navigation': ['Computer Vision', 'Robots']},
 {'Speech Recognition': ['Speech'],
  'Visual Speech Recognition': ['Computer Vision']},
 {'Dialogue Understanding': ['Natural Language Processing'],
  'Spoken Language Understanding': ['Speech']},
 {'Dialogue': ['Natural Language Processing', 'Speech'],
  'Visual Dialog': ['Computer Vision']},
 {'Image Generation': ['Computer Vision'],
  'Text-to-Image Generation': ['Natural Language Processing']},
 {'Machine Translation': ['Natural Language Processing'],
  'Automatic Post-Editing': ['Computer Vision']},
 {'Machine Translation': ['Natural Language Processing'],
  'Multimodal Machine Translation': ['Natural Language Processing',
   'Computer Vision']},
 {'Image Generation': [

- **Conclusion:**
    - Based on the first sight, there does not seem to be any structures among Categories, most subtasks take the same categories as their parents, and for those subtasks who has different categories than their parents, they could have more or totally different categories. 