In [1]:
from datetime import datetime
import json
import numpy as np
import arxivpy
import networkx as nx
from bokeh.models import ColumnDataSource
from bokeh.models import Plot, Range1d, MultiLine, Circle, HoverTool, BoxZoomTool, ResetTool
from bokeh.models.graphs import from_networkx
from bokeh.palettes import Spectral4
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.io import output_notebook, output_file
from bokeh.transform import linear_cmap
output_notebook()

In [2]:
from litreview import fetch_recent_cv_papers, load_papers
from litreview import get_titles_to_papers, get_author_to_titles
from litreview import get_collaboration_graph
from litreview import extract_highest_weight_nodes, extract_attribute_match
from litreview import get_community_papers

In [3]:
# Center the outputs
from IPython.display import display, HTML
display(HTML("""
<style>
.output {
    display: flex;
    align-items: center;
    text-align: center;
}
</style>
"""))

<h1 align="center">Journal Club</h1>

<center>
<table style="width:100%">
  <tr>
    <th><img src="journalclub_l.jpg" style="height: 300px;"/></th>
    <th><img src="journalclub_r.jpg" style="height: 300px;"/></th> 
  </tr>
</table>
<center>

<h1 align="center">Vision</h1>
<h2 align="center">Turning electromagnetic signals into understanding</h2>

* We **understand** if our interpretation of the signal is useful for achieving our goals.

<h1 align="center">Goal - photosynthesis</h1>

<center>
<img src="phacus.gif" style="height: 400px;"/>
<center>

<h1 align="center">Computer Vision</h1>
<h2 align="center">Turning electromagnetic signals into understanding automatically</h2>

* We **understand** if our interpretation of the signal is useful for achieving our goals.

<h1 align="center">Goal - navigation</h1>

<center>
<img src="cityscapes.png" style="height: 400px;"/>
<a href="https://www.cityscapes-dataset.com/">https://www.cityscapes-dataset.com/</a>
<center>

<h1 align="center">Goal - observing interesting events</h1>

<center>
<img src="crabnebula.jpg" style="height: 400px;"/>
<a href=https://arxiv.org/pdf/1902.00522.pdf>Deep Learning for Multi-Messenger Astrophysics</a>
<center>

<h1 align="center">Computer Vision Research</h1>

In [4]:
# Fetch the most recent computer vision papers
# The request rate to the arxiv API is quite conservative, so this will take a long time
# fetch_recent_cv_papers("papers.json")

# Load the papers
papers = load_papers("papers.json", earliest_date="2012-09-30")
print("Number of papers = {}.".format(len(papers)))
author_to_titles = get_author_to_titles(papers)
titles_to_papers = get_titles_to_papers(papers)

# Get the collaboration graph
collab_graph = get_collaboration_graph(papers, add_communities=True)
total_authors = collab_graph.number_of_nodes()
total_collaborations = collab_graph.number_of_edges()
print("Number of authors = {}.".format(total_authors))
print("Number of collaborations = {}.".format(total_collaborations))

Number of papers = 14961.
14.91 s to compute communities.
Number of authors = 29995.
Number of collaborations = 153495.


* There is a huge amount of active computer vision research
* Since 2012-09-30 on arXiv:
  * 14961 papers
  * 29995 authors
  * 153495 collaborations

In [5]:
# Extract a manageable subgraph of the highest weight authors
collab_graph_render = extract_highest_weight_nodes(collab_graph, 2048)
n_authors = collab_graph_render.number_of_nodes()
n_collaborations = collab_graph_render.number_of_edges() 

# Create plot
title = "{}/{} authors and {}/{} collaborations".format(n_authors, total_authors, n_collaborations, total_collaborations)
plot = figure(title=title, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1))
graph_renderer = from_networkx(collab_graph_render, nx.spring_layout, scale=1, center=(0, 0))
graph_renderer.node_renderer.glyph = Circle(size="weight", fill_color=Spectral4[0])
graph_renderer.edge_renderer.glyph = MultiLine(line_color="black", line_alpha=0.50, line_width="weight")

# Interactivity
node_hover_tool = HoverTool(tooltips=[("name", "@index"), ("weight", "@weight"), ("community", "@comm_level_0")])
plot.add_tools(node_hover_tool)

# Show
plot.renderers.append(graph_renderer)

In [6]:
show(plot)

<h1 align="center">Computer Vision Research Communities</h1>

* 2381 authors engaged in long-running research collaborations
* The topics of research don't exist in silos

In [7]:
# Get the collaboration graph
collab_graph = get_collaboration_graph(papers, min_edge_weight=1.0, min_node_weight=1.0, add_communities=True)
print("{} total authors".format(collab_graph.number_of_nodes()))
print("{} total collaborations".format(collab_graph.number_of_edges()))

0.13 s to compute communities.
2831 total authors
3987 total collaborations


In [8]:
n_authors = collab_graph.number_of_nodes()
n_collaborations = collab_graph.number_of_edges() 

# Create plot
title = "Tight collaborations with {} authors and {} total collaborations.".format(n_authors, n_collaborations)
plot = figure(title=title, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1))
graph_renderer = from_networkx(collab_graph, nx.spring_layout, scale=1, center=(0, 0))
graph_renderer.node_renderer.glyph = Circle(size="weight",
                                           fill_color=linear_cmap("comm_level_0", 'Category10_10', 0, 10))
graph_renderer.edge_renderer.glyph = MultiLine(line_color="black", line_alpha=0.50, line_width="weight")

# Interactivity
node_hover_tool = HoverTool(tooltips=[("name", "@index"), ("weight", "@weight"), ("community", "@comm_level_0")])
plot.add_tools(node_hover_tool)

# Show
plot.renderers.append(graph_renderer)

In [9]:
show(plot)

In [10]:
# Look at the largest top-level communities
comm_plots = []
comm_papers = []
for comm in range(10):
    comm_graph = extract_attribute_match(collab_graph, "comm_level_0", comm)
    n_authors = comm_graph.number_of_nodes()
    n_collaborations = comm_graph.number_of_edges()
    papers = get_community_papers(comm_graph, author_to_titles, titles_to_papers)
    comm_papers.append(papers)

    # Create plot
    comm_graph_render = extract_highest_weight_nodes(comm_graph, 2048)
    title = "Community {} with {} authors, {} total collaborations and {} papers.".format(comm, n_authors, n_collaborations, len(papers))
    plot = figure(title=title, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1), plot_width=800)
    graph_renderer = from_networkx(comm_graph_render, nx.spring_layout, scale=1, center=(0, 0))
    graph_renderer.node_renderer.glyph = Circle(size="weight", fill_color=linear_cmap("comm_level_0", 'Category10_10', 0, 10))
    graph_renderer.edge_renderer.glyph = MultiLine(line_color="black", line_alpha=0.50, line_width="weight")

    # Interactivity
    node_hover_tool = HoverTool(tooltips=[("name", "@index"), ("weight", "@weight")])
    plot.add_tools(node_hover_tool)

    # Show
    plot.renderers.append(graph_renderer)
    comm_plots.append(plot)

In [11]:
show(comm_plots[0])

In [12]:
[paper['publish_date'][:10]+" "+paper['title'] for paper in comm_papers[0]]

['2019-03-29 Deep Plug-and-Play Super-Resolution for Arbitrary Blur Kernels',
 '2019-03-28 FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation',
 '2019-03-27 Network Slimming by Slimmable Networks: Towards One-Shot Architecture Search for Channel Numbers',
 '2019-03-25 Video Relationship Reasoning using Gated Spatio-Temporal Energy Graph',
 '2019-03-25 Manifold Criterion Guided Transfer Learning via Intermediate Domain Generation',
 '2019-03-25 Knowledge-driven Encode, Retrieve, Paraphrase for Medical Image Report Generation',
 '2019-03-24 Pixel-aware Deep Function-mixture Network for Spectral Super-Resolution',
 '2019-03-22 Few-shot Adaptive Faster R-CNN',
 '2019-03-22 Joint 3D Face Reconstruction and Dense Face Alignment from A Single Image with 2D-Assisted Self-Supervised Learning',
 '2019-03-21 Weakly-Supervised Discovery of Geometry-Aware Representation for 3D Human Pose Estimation',
 '2019-03-18 Learning Correspondence from the Cycle-Consistency of T

In [13]:
show(comm_plots[1])

In [14]:
[paper['publish_date'][:10]+" "+paper['title'] for paper in comm_papers[1]]

['2019-03-28 Attention-Guided Generative Adversarial Networks for Unsupervised Image-to-Image Translation',
 '2019-03-28 High Fidelity Face Manipulation with Extreme Pose and Expression',
 '2019-03-28 Feature Intertwiner for Object Detection',
 '2019-03-27 Accurate Monocular 3D Object Detection via Color-Embedded 3D Reconstruction for Autonomous Driving',
 '2019-03-27 Self-Supervised Learning via Conditional Motion Propagation',
 '2019-03-27 Dense Intrinsic Appearance Flow for Human Pose Transfer',
 '2019-03-26 GS3D: An Efficient 3D Object Detection Framework for Autonomous Driving',
 '2019-03-25 Dual Variational Generation for Low-Shot Heterogeneous Face Recognition',
 '2019-03-23 An End-to-end Framework For Integrated Pulmonary Nodule Detection and False Positive Reduction',
 '2019-03-23 Automatic Pulmonary Lobe Segmentation Using Deep Learning',
 '2019-03-23 Automated pulmonary nodule detection using 3D deep convolutional neural networks',
 '2019-03-23 Feedback Network for Image Sup

In [15]:
show(comm_plots[2])

In [16]:
[paper['publish_date'][:10]+" "+paper['title'] for paper in comm_papers[2]]

['2019-03-28 Revisiting Local Descriptor based Image-to-Class Measure for Few-shot Learning',
 '2019-03-27 Scaling up the randomized gradient-free adversarial attack reveals overestimation of robustness using established attacks',
 '2019-03-26 Large-scale interactive object segmentation with human annotators',
 '2019-03-25 f-VAEGAN-D2: A Feature Generating Framework for Any-Shot Learning',
 '2019-03-04 The StreetLearn Environment and Dataset',
 '2019-03-01 Learning To Follow Directions in Street View',
 '2019-02-15 Cycle-Consistency for Robust Visual Question Answering',
 '2019-02-14 Learning to Control Self-Assembling Morphologies: A Study of Generalization via Modularity',
 '2019-01-29 PA-GAN: Improving GAN Training by Progressive Augmentation',
 '2019-01-11 DMC-Net: Generating Discriminative Motion Cues for Fast Compressed Video Action Recognition',
 '2018-12-19 MID-Fusion: Octree-based Object-Level Multi-Instance Dynamic SLAM',
 '2018-12-17 Not Using the Car to See the Sidewalk: Qu

<h1 align="center">Scene understanding</h1> 

* The aim is to automate the understanding of a still image.
* Primarily answers "**what**" and "**where**" questions.

<h1 align="center">Scene understanding - classification</h1> 

* **What** things are in this image?

<center>
<img src="imagenet_classification_examples.png" style="height: 350px;"/>
<center>

<h1 align="center">Scene understanding - classification</h1> 

* Prominent open dataset: [ImageNet](http://image-net.org/challenges/LSVRC/2017/index#loc), 1.2M images, 1000 categories
* Metric: top-5 error

In [17]:
# Load state of the art data
with open("sotas.json") as f:
    sotas = json.loads(f.read())

sotas_columnar = {}
for challenge, winners in sotas.items():
    winners_columnar = {key: [] for key in winners[0].keys()}
    for entry in winners:
        for key, val in entry.items():
            if key == "date":
                val = datetime.strptime(val, "%Y-%m-%d")
            winners_columnar[key].append(val)
    sotas_columnar[challenge] = winners_columnar

In [18]:
# ImageNet classification winners
imagenet_classification_winners_source = ColumnDataSource(data=sotas_columnar["classification"])
p_imagenet_classification = figure(title="ImageNet classification winners", x_axis_type="datetime", y_range=(0.0, 0.3), plot_width=800, plot_height=600)
p_imagenet_classification.line("date", "error", color="black", line_width=4, source=imagenet_classification_winners_source)
p_imagenet_classification.circle("date", "error", color="black", size=20, source=imagenet_classification_winners_source)
hover_tool = HoverTool(tooltips=[("error", "@error"), ("team_name", "@team_name")])
p_imagenet_classification.add_tools(hover_tool)
p_imagenet_classification.xaxis.axis_label = "Date"
p_imagenet_classification.yaxis.axis_label = "Error"
p_imagenet_classification.xaxis.axis_label_text_font_size = "16pt"
p_imagenet_classification.xaxis.major_label_text_font_size = "16pt"
p_imagenet_classification.yaxis.axis_label_text_font_size = "16pt"
p_imagenet_classification.yaxis.major_label_text_font_size = "16pt"

In [19]:
show(p_imagenet_classification)

<h1 align="center">Scene understanding - classification</h1> 

* SOTA: [Dual Path Networks](https://arxiv.org/pdf/1707.01629.pdf)
    * Balances feature discovery and feature re-use
* Challenges:
   * Object occlusion
   * Over-reliance on context

<h1 align="center">Scene understanding - detection</h1> 

* **What** things are in this image?
* **Where** are the extents of the things in this image?

<center>
<table style="width:100%">
  <tr>
    <th><img src="imagenet_detection_example_1.png" style="height: 300px;"/></th>
    <th><img src="imagenet_detection_example_2.png" style="height: 300px;"/></th> 
  </tr>
</table>
<center>

<h1 align="center">Scene understanding - detection</h1> 

* Prominent open dataset: [ImageNet](http://image-net.org/challenges/LSVRC/2017/index#det), 457k images, 200 categories
* Metric: Mean average precision (AP) at intersection over union (IoU) > 0.5

<center>
<img src="iou_examples.png" style="height: 300px;"/>
<center>

In [20]:
# ImageNet detection winners
imagenet_detection_winners_source = ColumnDataSource(data=sotas_columnar["detection"])
p_imagenet_detection = figure(title="ImageNet detection winners", x_axis_type="datetime", y_range=(0.0, 1.0), plot_width=800, plot_height=600)
p_imagenet_detection.line("date", "mean_ap", color="black", line_width=4, source=imagenet_detection_winners_source)
p_imagenet_detection.circle("date", "mean_ap", color="black", size=20, source=imagenet_detection_winners_source)
hover_tool = HoverTool(tooltips=[("mean_ap", "@mean_ap"), ("team_name", "@team_name")])
p_imagenet_detection.add_tools(hover_tool)
p_imagenet_detection.xaxis.axis_label = "Date"
p_imagenet_detection.yaxis.axis_label = "Mean Average Precision"
p_imagenet_detection.xaxis.axis_label_text_font_size = "16pt"
p_imagenet_detection.xaxis.major_label_text_font_size = "16pt"
p_imagenet_detection.yaxis.axis_label_text_font_size = "16pt"
p_imagenet_detection.yaxis.major_label_text_font_size = "16pt"

In [21]:
show(p_imagenet_detection)

<h1 align="center">Scene understanding - detection</h1>

<center>
<img src="fruits.png" style="height: 450px;"/>
<center>

<h1 align="center">Scene understanding - segmentation</h1> 

* **Semantic**: for each pixel, **what** kind of thing is it a part of?
* **Instance**: in addition, **which** particular thing does the pixel belong to?

<center>
<img src="coco_detection_example_1.png"/>
<center>

<h1 align="center">Scene understanding - segmentation</h1> 

* Prominent open dataset: [COCO Object Detection](http://cocodataset.org/#detection-2018), 200k images, 80 categories
* Metric: mean AP for IoU = (0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95)

In [22]:
# COCO instance segmentation winners
coco_instance_segmentation_winners_source = ColumnDataSource(data=sotas_columnar["instance_segmentation"])
p_coco_instance_segmentation = figure(title="COCO instance segmentation winners", x_axis_type="datetime", y_range=(0.0, 1.0), plot_width=800, plot_height=600)
p_coco_instance_segmentation.line("date", "ap_50", color="black", line_width=4, source=coco_instance_segmentation_winners_source, legend="AP50")
p_coco_instance_segmentation.circle("date", "ap_50", color="black", size=20, source=coco_instance_segmentation_winners_source)
p_coco_instance_segmentation.line("date", "ap_75", color="blue", line_width=4, source=coco_instance_segmentation_winners_source, legend="AP70")
p_coco_instance_segmentation.circle("date", "ap_75", color="blue", size=20, source=coco_instance_segmentation_winners_source)
hover_tool = HoverTool(tooltips=[("ap_50", "@ap_50"), ("ap_75", "@ap_75"), ("team_name", "@team_name")])
p_coco_instance_segmentation.add_tools(hover_tool)
p_coco_instance_segmentation.xaxis.axis_label = "Date"
p_coco_instance_segmentation.yaxis.axis_label = "Average Precision"
p_coco_instance_segmentation.xaxis.axis_label_text_font_size = "16pt"
p_coco_instance_segmentation.xaxis.major_label_text_font_size = "16pt"
p_coco_instance_segmentation.yaxis.axis_label_text_font_size = "16pt"
p_coco_instance_segmentation.yaxis.major_label_text_font_size = "16pt"

In [23]:
show(p_coco_instance_segmentation)

<h1 align="center">Scene understanding - segmentation</h1> 

* SOTA - [Cascade R-CNN](https://arxiv.org/abs/1712.00726) + [FishNet](https://arxiv.org/abs/1901.03495)
    * Improved communication between branches: bounding box regression, mask prediction, semantic segmentation
* Challenges
    * Unclear notions of an individual object
    * Discontinuities in object mask

<h1 align="center">Scene understanding - segmentation</h1> 

<center>
<img src="treeseg.png" style="height: 450px;"/>
<center>

<h1 align="center">Scene understanding - even more detail</h1> 

* [Panoptic segmentation](http://cocodataset.org/#panoptic-2018): instance segmentation, plus segmentation of non-objects (stuff)
* [Depth estimation](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_prediction): how far away is each pixel from the camera?
* [Keypoint detection](http://cocodataset.org/#keypoints-2018): where are the parts of things in this image?
* [Pose estimation](http://openaccess.thecvf.com/content_ECCV_2018/papers/Timo_von_Marcard_Recovering_Accurate_3D_ECCV_2018_paper.pdf): what are the orientations of the parts of things in this image?

<h1 align="center">Scene understanding - even more detail</h1> 

<center>
<img src="keypoints.png" style="height: 400px;"/>
<center>

<h1 align="center">Image registration / optical flow</h1>

* These two images are of the same area, possibly with different sensors.
* For each pixel in image A, **where** does it belong in image B?

<center>
<img src="proposal_flow_example.jpg" style="height: 300px;"/>
<center>

<h1 align="center">Image registration / optical flow</h1>

* Prominent open dataset: [Proposal Flow](https://www.di.ens.fr/willow/research/proposalflow/)
* SOTA - [RoccoFlow](https://arxiv.org/abs/1703.05593)

<h1 align="center">Image registration / optical flow</h1>
<center>
<table style="width:100%">
  <tr>
    <th><img src="registration_a.png" style="height: 400px;"/></th>
    <th><img src="registration_b.png" style="height: 400px;"/></th> 
  </tr>
</table>
<center>

<h1 align="center">Image registration / optical flow</h1>
<center>
<table style="width:100%">
  <tr>
    <th><img src="registration_a.png" style="height: 400px;"/></th>
    <th><img src="registration_b_star.png" style="height: 400px;"/></th> 
  </tr>
</table>
<center>

<h1 align="center">Other problems</h1> 

* [Image search](https://hal.inria.fr/inria-00316866/document): given an image, can we find similar images?
* [Image generation](https://arxiv.org/pdf/1502.04623.pdf): given a prompt, can we make an image that matches it?
* [Video captioning](http://openaccess.thecvf.com/content_cvpr_2016/papers/Yu_Video_Paragraph_Captioning_CVPR_2016_paper.pdf): can we say what's happening at a given moment?
* [Visual question answering](http://openaccess.thecvf.com/content_iccv_2015/papers/Antol_VQA_Visual_Question_ICCV_2015_paper.pdf): can we answer queries about an image?

<h1 align="center">Other problems</h1> 

<center>
<img src="pizza.png" style="height: 300px;"/>
<center>

<h1 align="center">General challenges - effort</h1> 

* Weak supervision
* Partial supervision

<h1 align="center">General challenges - effort</h1> 

<center>
<table style="width:100%">
  <tr>
    <th><img src="khoreva_01.png" style="height: 400px;"/></th>
    <th><img src="khoreva_04.png" style="height: 400px;"/></th> 
  </tr>
</table>
<center>

<h1 align="center">General challenges - robustness</h1> 

* Adverserial examples
* Bias
* Privacy
* Interpretability
* Safety

<h1 align="center">General challenges - robustness</h1> 

<center>
<img src="adversarial.jpg" style="height: 300px;"/>
<center>

<h1 align="center">Let's keep learning!</h1>

* Next up:
    * 8th of May: Advances in Image Classification, Munsanje Mweene
    * You?