In [1]:
import requests
import pandas as pd
import numpy as np
import scattertext as st
import spacy
import time
from IPython.display import IFrame
from IPython.core.display import display, HTML
from bokeh.palettes import PuBu
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, ranges, LabelSet
from bokeh.plotting import figure
output_notebook()
display(HTML("<style>.container { width:98% !important; }</style>"))
%matplotlib inline

In [11]:
url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'
df = pd.DataFrame(requests.get(url).json()['notes'])

In [154]:
forum_content = []
for i, forum_id in list(enumerate(df.forum)):
    notes_url = 'https://openreview.net/notes?forum={}&trash=true'.format(forum_id)
    try:
        forum_content.append(requests.get(notes_url).json())
    except:
        print('err', i, forum_id)
        forum_content = {}
    time.sleep(.3)
df['forumContent'] = pd.Series(forum_content)   
df.to_csv('iclr2018reviews.csv.bz2', index=False, compression='bz2')

In [2]:
#
# Code here is to read locally:
read_local = True
if read_local:
    df = pd.read_csv('iclr2018reviews.csv.bz2')
    df['forumContent'] = df.forumContent.apply(eval) # totally unsafe
    df['content'] = df.content.apply(eval) 

In [3]:
df['decision_raw'] = df.forumContent.apply(lambda x:[n['content']['decision'] 
                                                     for n in x['notes'] 
                                                     if 'decision' in n['content']][0])
df['decision_raw'].value_counts()

Reject                      504
Accept (Poster)             313
Invite to Workshop Track     90
Accept (Oral)                23
Name: decision_raw, dtype: int64

In [4]:
len(df)

930

In [6]:
df['title'] = df.content.apply(lambda x: x['title'])
df['authors'] = df.content.apply(lambda x: x['authors'])

only_reviews_df = pd.concat(df.forumContent.apply(lambda c: pd.DataFrame([
    {'review': n['content']['review'], 
     'rating': n['content']['rating'],  
     'confidence': n['content']['confidence'],
     'forum': n['forum']} 
    for n in c['notes'] 
    if 'content' in n and 'review' in n['content']
])).tolist())
reviews_df = pd.merge(df[['title', 'authors', 'decision_raw', 'forum']], only_reviews_df, on='forum')
#reviews_df.groupby('decision_raw')['rating'].value_counts()
reviews_df['decision'] = (reviews_df['decision_raw']
                          .apply(lambda x: 'Reject' if x == 'Reject' 
                                 else ('Accept' if x.startswith('Accept') 
                                       else 'Workshop')))
reviews_df['rating_bin'] = (reviews_df['rating']
                            .apply(lambda x: (lambda s: 'Negative' if s < 5 
                                              else ('Positive' if s > 6 else 'Neutral'))
                                   (int(x.split(':')[0].strip()))))
reviews_df['category'] = reviews_df['decision'] + ', ' + reviews_df['rating_bin']

In [7]:
decisions = reviews_df[['forum','decision_raw']].drop_duplicates()['decision_raw'].value_counts()
source = ColumnDataSource(dict(x=list(decisions.index),y=decisions.values))

#source = ColumnDataSource({'x': decisions.index, 'y': decisions.values}

plot = figure(plot_width=600, plot_height=300, tools="save",
        x_axis_label = "Decision",
        y_axis_label = "Paper Count",
        title="",
        x_minor_ticks=2,
        x_range = source.data["x"],
        y_range= ranges.Range1d(start=0,end=600))


labels = LabelSet(x='x', y='y', text='y', level='glyph',
        x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')

plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])

plot.add_layout(labels)
show(plot)

In [8]:
ratings = reviews_df['rating'].value_counts()
ratings.index = [int(c.split(':')[0]) for c in ratings.index]
ratings = ratings.sort_index()
source = ColumnDataSource(dict(x=[str(x) for x in ratings.index],y=ratings.values))

plot = figure(plot_width=600, plot_height=300, tools="save",
        x_axis_label = "Rating",
        y_axis_label = "Review Count",
        title="",
        x_minor_ticks=2,
        x_range = source.data["x"],
        y_range= ranges.Range1d(start=0,end=ratings.max() + 100))


labels = LabelSet(x='x', y='y', text='y', level='glyph',
        x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')

plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])

plot.add_layout(labels)
show(plot)

  elif np.issubdtype(type(obj), np.float):


In [172]:
reviews_df.to_csv('iclr2018_reviews.csv.bz2', index=False, compression='bz2')