In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from asqp_rl_demo import *

# Explanation

ASQP-RL is a novel algorithm to curates subsets of large tabular datasets using Reinforcement Learning. The following is a user study meant to test the quality of the system with user feedback.
In order to test the system you will be presented a few questions. Each question consists of an SQL query and two possible dataframes, one is the answer from running the query and the other extracted from the system. We ask that you try and identify which is which.

# Questions

In [3]:
asqp_rl = AsqpInstance(name='user_study')

### Question 1

In [4]:
sql = '''
SELECT title,
       YEAR,
       o.name AS organization
FROM organization o,
     publication p,
     publication_organization po,
     domain_publication dp,
     DOMAIN d
WHERE p.pid = po.pid
  AND o.oid = po.oid
  AND dp.pid = p.pid
  AND dp.did = d.did
  AND d.name = 'Databases'
  AND o.name IN ('Tel Aviv University', 'University of Maryland', 'University of Michigan');
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

title,year,organization
Crowd-Based Data Sourcing - (Abstract).,2011,Tel Aviv University
Making Collective Wisdom Wiser.,2013,Tel Aviv University
Circuits for Datalog Provenance.,2014,Tel Aviv University
Deriving probabilistic databases with inference ensembles.,2011,Tel Aviv University
Breaking out of the MisMatch trap.,2014,University of Michigan
Sample-driven schema mapping.,2012,University of Michigan
A sample-and-clean framework for fast and accurate query processing on dirty data.,2014,University of California Berkeley
On provenance minimization.,2011,Tel Aviv University
Making interval-based clustering rank-aware.,2011,Tel Aviv University
Crowd mining.,2013,Tel Aviv University

title,year,organization
An Image Retrieval Method Based on Information Filtering of User Relevance Feedback Records.,2003,University of Maryland
Dynamic XML documents with distribution and replication.,2003,Tel Aviv University
Data Intensive Production Systems: The DIPS Approach.,1989,University of Maryland
Better Algorithms and Bounds for Directed Maximum Leaf Problems.,2007,Tel Aviv University
Fast Failure Recovery in Distributed Graph Processing Systems.,2014,University of Michigan
Multilingual Topic Models for Unaligned Text.,2012,University of Maryland
Optimal Fillings - A new spatial subdivision problem related to packing and covering.,2012,University of Michigan
Intensional semantics for RDF data structures.,2008,University of Maryland
MKL-RT: Multiple Kernel Learning for Ratio-trace Problems via Convex Optimization.,2014,University of Maryland
Front Matter.,2014,University of Michigan


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 2

In [5]:
sql = '''
SELECT a.name,
       a.paper_count,
       a.citation_count,
       o.name AS "organization"
FROM author a,
     writes w,
     publication p,
     organization o,
     publication_organization po
WHERE a.aid = w.aid
  AND w.pid = p.pid
  AND p.pid = po.pid
  AND o.oid = po.oid
  AND a.paper_count < 100
  AND a.citation_count > 1000
  AND o.name = 'University of California San Diego'
  AND p.year > 2013;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name,paper_count,citation_count,organization
Kristofer S. J. Pister,46,3328,University of California San Diego
Srilatha Manne,23,1285,University of California San Diego
Frederic T. Chong,83,1037,University of California San Diego
Brighten Godfrey,64,1084,University of California San Diego
Fei Sha,77,1408,University of California San Diego
Yannis Papakonstantinou,88,7175,University of California San Diego
Jeffrey C. Mogul,90,4747,University of California San Diego
Kristen Grauman,97,1535,University of California San Diego
John Kubiatowicz,82,6934,University of California San Diego
Charles Elkan,85,3836,University of California San Diego

name,paper_count,citation_count,organization
Ranjit Jhala,71,2930,University of California San Diego
John Kubiatowicz,82,6934,University of California San Diego
James S. Plank,78,2858,University of California San Diego
Javier R. Movellan,82,1615,University of California San Diego
Jeffrey C. Mogul,90,4747,University of California San Diego
Stephen W. Keckler,89,3940,University of California San Diego
Scott Hauck,94,2977,University of California San Diego
Srilatha Manne,23,1285,University of California San Diego
Xiaolan Zhang,52,1571,University of California San Diego
Carla E. Brodley,75,2866,University of California San Diego


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 3

In [6]:
sql = '''
SELECT domain.name
FROM conference,
     domain_conference,
     DOMAIN,
     domain_keyword,
     keyword
WHERE conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND domain.did = domain_keyword.did
  AND domain_keyword.kid = keyword.kid
  AND keyword.keyword = 'Machine Learning'
GROUP BY domain.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
Machine Learning & Pattern Recognition
Artificial Intelligence

name
Artificial Intelligence
Machine Learning & Pattern Recognition


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 4

In [7]:
sql = '''
SELECT conference.name
FROM author,
     organization,
     writes,
     publication,
     conference,
     domain_conference,
     domain
WHERE author.oid = organization.oid
  AND author.aid = writes.aid
  AND writes.pid = publication.pid
  AND publication.cid = conference.cid
  AND conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND publication.citation_count > 1
GROUP BY conference.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
IEEECIT
IFIP1-7
ITNG
MSR
SEE
PADS
KRDB
GIR
ANTALG
NIR

name
AAAI
FGR
EDS
WEBDB
ECCB
TES
RECOMB
CRYPTO
EMNLP
ECCV


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

In [8]:
asqp_rl.save_answers_button()

Button(description='Finish', style=ButtonStyle())

Output()