In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from asqp_rl_demo import *

# Explanation

ASQP-RL is a novel algorithm to curates subsets of large tabular datasets using Reinforcement Learning. The following is a user study meant to test the quality of the system with user feedback.
In order to test the system you will be presented a few questions. Each question consists of an SQL query and two possible dataframes, one is the answer from running the query and the other extracted from the system. We ask that you try and identify which is which.

# Questions

In [3]:
asqp_rl = AsqpInstance(name='user_study')

### Question 1

In [4]:
sql = '''
SELECT title,
       YEAR,
       o.name AS organization
FROM organization o,
     publication p,
     publication_organization po,
     domain_publication dp,
     DOMAIN d
WHERE p.pid = po.pid
  AND o.oid = po.oid
  AND dp.pid = p.pid
  AND dp.did = d.did
  AND d.name = 'Databases'
  AND o.name IN ('Tel Aviv University', 'University of Maryland', 'University of Michigan');
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

title,year,organization
Mob data sourcing.,2012,Tel Aviv University
Making Collective Wisdom Wiser.,2013,Tel Aviv University
Revision provenance in text documents of asynchronous collaboration.,2013,University of Michigan
Skimmer: rapid scrolling of relational query results.,2012,University of Michigan
Mining the Crowd.,2014,Tel Aviv University
DSH: data sensitive hashing for high-dimensional k-nnsearch.,2014,University of Michigan
A sample-and-clean framework for fast and accurate query processing on dirty data.,2014,University of California Berkeley
Object Semantics for XML Keyword Search.,2014,University of Michigan
Circuits for Datalog Provenance.,2014,Tel Aviv University
Provenance views for module privacy.,2011,Tel Aviv University

title,year,organization
Coding for Parallel Channels: Gallager Bounds for Binary Linear Codes with Applications to Repeat-Accumulate Codes and Variations.,2006,Tel Aviv University
A Research Status Report on Adaptation for Mobile Data Access.,1995,University of Michigan
On Element SDD Approximability.,2009,Tel Aviv University
Threshold Functions in Random s-Intersection Graphs.,2015,University of Maryland
Fast Partial Distance Estimation and Applications.,2014,Tel Aviv University
A Classification of Information Systems: Analysis and Interpretation.,1993,Tel Aviv University
Spectral Graph Cut from a Filtering Point of View.,2012,University of Maryland
A Population Analysis for Hierarchical Data Structures.,1987,University of Maryland
Lossy Compression via Sparse Linear Regression: Computationally Efficient Encoding and Decoding.,2012,University of Michigan
Protocols for Learning Classifiers on Distributed Data.,2012,University of Maryland


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 2

In [5]:
sql = '''
SELECT a.name,
       a.paper_count,
       a.citation_count,
       o.name AS "organization"
FROM author a,
     writes w,
     publication p,
     organization o,
     publication_organization po
WHERE a.aid = w.aid
  AND w.pid = p.pid
  AND p.pid = po.pid
  AND o.oid = po.oid
  AND a.paper_count < 100
  AND a.citation_count > 1000
  AND o.name = 'University of California San Diego'
  AND p.year > 2013;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name,paper_count,citation_count,organization
Rajeev Balasubramonian,54,1330,University of California San Diego
Scott Hauck,94,2977,University of California San Diego
Benjamin Taskar,88,2558,University of California San Diego
Ulfar Erlingsson,31,1053,University of California San Diego
Ratul Mahajan,66,3231,University of California San Diego
Gary J. Sullivan,30,3344,University of California San Diego
Philippe Bonnet,64,1368,University of California San Diego
Michael Bedford Taylor,33,1312,University of California San Diego
Sarang Dharmapurikar,19,1071,University of California San Diego
Adam Finkelstein,65,2923,University of California San Diego

name,paper_count,citation_count,organization
James S. Plank,78,2858,University of California San Diego
Ilkay Altintas,54,1062,University of California San Diego
Kristen Grauman,97,1535,University of California San Diego
Scott Hauck,94,2977,University of California San Diego
Alin Deutsch,76,2596,University of California San Diego
Carla E. Brodley,75,2866,University of California San Diego
Fei Sha,77,1408,University of California San Diego
Thierry Turletti,91,1856,University of California San Diego
Srilatha Manne,23,1285,University of California San Diego
Haizhou Ai,91,1004,University of California San Diego


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 3

In [6]:
sql = '''
SELECT domain.name
FROM conference,
     domain_conference,
     DOMAIN,
     domain_keyword,
     keyword
WHERE conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND domain.did = domain_keyword.did
  AND domain_keyword.kid = keyword.kid
  AND keyword.keyword = 'Machine Learning'
GROUP BY domain.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
Artificial Intelligence
Machine Learning & Pattern Recognition

name
Machine Learning & Pattern Recognition
Artificial Intelligence


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 4

In [7]:
sql = '''
SELECT conference.name
FROM author,
     organization,
     writes,
     publication,
     conference,
     domain_conference,
     domain
WHERE author.oid = organization.oid
  AND author.aid = writes.aid
  AND writes.pid = publication.pid
  AND publication.cid = conference.cid
  AND conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND publication.citation_count > 1
GROUP BY conference.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
IWAR
ICRE
ALIFE
DICTA
EUROSYS
APPROX
GLAKES
XP
CA
EURO-PDS

name
EMNLP
TES
DAC
WADS
IWBRS
EUROCRYPT
ICDAR
MCS
SSDBM
INEX


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

In [8]:
asqp_rl.save_answers_button()

Button(description='Finish', style=ButtonStyle())

Output()