In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from asqp_rl_demo import *

# Explanation

ASQP-RL is a novel algorithm to curates subsets of large tabular datasets using Reinforcement Learning. The following is a user study meant to test the quality of the system with user feedback.
In order to test the system you will be presented a few questions. Each question consists of an SQL query and two possible dataframes, one is the answer from running the query and the other extracted from the system. We ask that you try and identify which is which.

# Questions

In [3]:
asqp_rl = AsqpInstance()

### Question 1

In [4]:
sql = '''
SELECT title,
       YEAR,
       o.name AS organization
FROM organization o,
     publication p,
     publication_organization po,
     domain_publication dp,
     DOMAIN d
WHERE p.pid = po.pid
  AND o.oid = po.oid
  AND dp.pid = p.pid
  AND dp.did = d.did
  AND d.name = 'Databases'
  AND o.name IN ('Tel Aviv University', 'University of Maryland', 'University of Michigan');
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

title,year,organization
Using Data for Systemic Financial Risk Management.,2011,University of Maryland
Uncertainty in Crowd Data Sourcing Under Structural Constraints.,2014,Tel Aviv University
Methods for boosting recommender systems.,2011,Tel Aviv University
Enabling Privacy in Provenance-Aware Workflow Systems.,2011,Tel Aviv University
A propagation model for provenance views of public/private workflows.,2013,Tel Aviv University
Using Markov Chain Monte Carlo to play Trivia.,2011,Tel Aviv University
Asking the Right Questions in Crowd Data Sourcing.,2012,Tel Aviv University
Making interval-based clustering rank-aware.,2011,Tel Aviv University
Understanding Local Structure in Ranked Datasets.,2013,Tel Aviv University
A sample-and-clean framework for fast and accurate query processing on dirty data.,2014,Tel Aviv University

title,year,organization
Non-contractible Factors as Determinants of Electronic Market Adoption.,2002,University of Maryland
Report on the IBM data security study.,1974,University of Michigan
Challenges and Opportunities with Big Data.,2012,University of Michigan
k-Connectivity in Secure Wireless Sensor Networks with Physical Link Constraints - The On/Off Channel Model.,2012,University of Maryland
From Conceptual Modeler to University President.,1997,University of Michigan
Proceedings of the Third Conference on Uncertainty in Artificial Intelligence (1987).,2013,University of Maryland
An Information Model for Human Genome Map Representation and Assembly.,1993,University of Michigan
Optimal Packet Scheduling in an Energy Harvesting Communication System.,2010,University of Maryland
Review of Spatial Databases and Geographic Information Systems.,2009,University of Maryland
Efficient Evaluation of Radial Queries using the Target Tree.,2005,University of Michigan


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Reveal answers', style=ButtonStyle())

Output()

### Question 2

In [5]:
sql = '''
SELECT a.name,
       a.paper_count,
       a.citation_count,
       o.name AS "organization"
FROM author a,
     writes w,
     publication p,
     organization o,
     publication_organization po
WHERE a.aid = w.aid
  AND w.pid = p.pid
  AND p.pid = po.pid
  AND o.oid = po.oid
  AND a.paper_count < 100
  AND a.citation_count > 1000
  AND o.name = 'University of California San Diego'
  AND p.year > 2013;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name,paper_count,citation_count,organization
Joshua B. Tenenbaum,75,1319,University of California San Diego
Constantinos Daskalakis,99,1166,University of California San Diego
Alex C. Snoeren,82,2506,University of California San Diego
Alin Deutsch,76,2596,University of California San Diego
Scott Hauck,94,2977,University of California San Diego
Thomas L. Griffiths,74,1490,University of California San Diego
Gary J. Sullivan,30,3344,University of California San Diego
Javier R. Movellan,82,1615,University of California San Diego
Ranjit Jhala,71,2930,University of California San Diego
Hovav Shacham,63,3659,University of California San Diego

name,paper_count,citation_count,organization
Thierry Turletti,91,1856,University of California San Diego
Sanjoy Dasgupta,70,1318,University of California San Diego
James S. Plank,78,2858,University of California San Diego
Haizhou Ai,91,1004,University of California San Diego
John Kubiatowicz,82,6934,University of California San Diego
Myron Flickner,38,5584,University of California San Diego
Scott Hauck,94,2977,University of California San Diego
Curt Schurgers,47,1120,University of California San Diego
Olga Sorkine-Hornung,77,1775,University of California San Diego
Alex C. Snoeren,82,2506,University of California San Diego


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Reveal answers', style=ButtonStyle())

Output()

### Question 3

In [6]:
sql = '''
SELECT domain.name
FROM conference,
     domain_conference,
     DOMAIN,
     domain_keyword,
     keyword
WHERE conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND domain.did = domain_keyword.did
  AND domain_keyword.kid = keyword.kid
  AND keyword.keyword = 'Machine Learning'
GROUP BY domain.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
Artificial Intelligence
Machine Learning & Pattern Recognition

name
Artificial Intelligence
Machine Learning & Pattern Recognition


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Reveal answers', style=ButtonStyle())

Output()

### Question 4

In [7]:
sql = '''
SELECT conference.name
FROM author,
     organization,
     writes,
     publication,
     conference,
     domain_conference,
     domain
WHERE author.oid = organization.oid
  AND author.aid = writes.aid
  AND writes.pid = publication.pid
  AND publication.cid = conference.cid
  AND conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND publication.citation_count > 1
GROUP BY conference.name
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
DMKD
ISOOMS
OGAI
IFIP12
HT
IWDM
PG
MOBILITY
ACMICEC
CMG

name
PODS
WIDM
PDIS
VISUALIZATION
TES
EEXTT
ICML
ESA
COLT
ECCV


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Reveal answers', style=ButtonStyle())

Output()

### TODO:prepare sample as df and let users query that

# Results

In [9]:
asqp_rl.reveal_results()

You were correct in 0 out of 2 questions
