In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from asqp_rl_demo import *

# Explanation

ASQP-RL is a novel algorithm to curates subsets of large tabular datasets using Reinforcement Learning. The following is a user study meant to test the quality of the system with user feedback.
In order to test the system you will be presented a few questions. Each question consists of an SQL query and two possible dataframes, one is the answer from running the query and the other extracted from the system. We ask that you try and identify which is which.

# Questions

In [3]:
asqp_rl = AsqpInstance(name='user_study')

### Question 1

In [4]:
sql = '''
SELECT title,
       YEAR,
       o.name AS organization
FROM organization o,
     publication p,
     publication_organization po,
     domain_publication dp,
     DOMAIN d
WHERE p.pid = po.pid
  AND o.oid = po.oid
  AND dp.pid = p.pid
  AND dp.did = d.did
  AND d.name = 'Databases'
  AND o.name IN ('Tel Aviv University', 'University of Maryland', 'University of Michigan');
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

title,year,organization
OASSIS: query driven crowd mining.,2014,Tel Aviv University
SWST: A Disk Based Index for Sliding Window Spatio-Temporal Data.,2012,University of California San Diego
SWST: A Disk Based Index for Sliding Window Spatio-Temporal Data.,2012,University of Michigan
Enabling Privacy in Provenance-Aware Workflow Systems.,2011,Tel Aviv University
The ACM PODS Alberto O. Mendelzon test-of-time award 2013.,2013,Tel Aviv University
Making interval-based clustering rank-aware.,2011,Tel Aviv University
Deriving probabilistic databases with inference ensembles.,2011,Tel Aviv University
Breaking out of the MisMatch trap.,2014,University of Michigan
A sample-and-clean framework for fast and accurate query processing on dirty data.,2014,Tel Aviv University
Using Markov Chain Monte Carlo to play Trivia.,2011,Tel Aviv University

title,year,organization
Modal Analysis with Compressive Measurements.,2013,University of Michigan
Looking at Everything in Context.,2015,University of Michigan
Provenance for Web 2.0 Data.,2012,Tel Aviv University
Question-and-answer passwords: an empirical evaluation.,1991,Tel Aviv University
Visual Analytics of Urban Environments using High-Resolution Geographic Data.,2010,Tel Aviv University
UMD Mindlab Rules Workshop Position Paper.,2005,University of Maryland
Does Growing Demand for Data Science Create New Opportunities for Information Systems?,2014,University of Maryland
Multiple Hypotheses Iterative Decoding of LDPC in the Presence of Strong Phase Noise.,2012,Tel Aviv University
Testing Satisfaction of Functional Dependencies.,1980,University of Michigan
Fast Activity Detection: Indexing for Temporal Stochastic Automaton-Based Activity Models.,2013,University of Maryland


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 2

In [5]:
sql = '''
SELECT a.name,
       a.paper_count,
       a.citation_count,
       o.name AS "organization"
FROM author a,
     writes w,
     publication p,
     organization o,
     publication_organization po
WHERE a.aid = w.aid
  AND w.pid = p.pid
  AND p.pid = po.pid
  AND o.oid = po.oid
  AND a.paper_count < 100
  AND a.citation_count > 1000
  AND o.name = 'University of California San Diego'
  AND p.year > 2013;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name,paper_count,citation_count,organization
Deva Ramanan,67,1492,University of California San Diego
Srilatha Manne,23,1285,University of California San Diego
Ram Zamir,90,2389,University of California San Diego
Rajeev Balasubramonian,54,1330,University of California San Diego
Benjamin Taskar,88,2558,University of California San Diego
Alin Deutsch,76,2596,University of California San Diego
Gert R. G. Lanckriet,87,2644,University of California San Diego
James Fogarty,62,1049,University of California San Diego
Thierry Turletti,91,1856,University of California San Diego
Carla E. Brodley,75,2866,University of California San Diego

name,paper_count,citation_count,organization
Ranjit Jhala,71,2930,University of California San Diego
Emre Kiciman,37,1147,University of California San Diego
Charles Elkan,85,3836,University of California San Diego
David W. McDonald,70,1561,University of California San Diego
Fei Sha,77,1408,University of California San Diego
Frederic T. Chong,83,1037,University of California San Diego
Thierry Turletti,91,1856,University of California San Diego
Xiaolan Zhang,52,1571,University of California San Diego
Javier R. Movellan,82,1615,University of California San Diego
Kenneth Zeger,94,3186,University of California San Diego


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 3

In [6]:
sql = '''
SELECT domain.name
FROM conference,
     domain_conference,
     DOMAIN,
     domain_keyword,
     keyword
WHERE conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND domain.did = domain_keyword.did
  AND domain_keyword.kid = keyword.kid
  AND keyword.keyword = 'Machine Learning'
GROUP BY domain.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
Artificial Intelligence
Machine Learning & Pattern Recognition

name
Artificial Intelligence
Machine Learning & Pattern Recognition


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 4

In [7]:
sql = '''
SELECT conference.name
FROM author,
     organization,
     writes,
     publication,
     conference,
     domain_conference,
     domain
WHERE author.oid = organization.oid
  AND author.aid = writes.aid
  AND writes.pid = publication.pid
  AND publication.cid = conference.cid
  AND conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND publication.citation_count > 1
GROUP BY conference.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
XMLSEC
IEEEICCI
PERSUASIVE
NLUCS
IWDM
AINA
ARCS
SEDE
CVDB
DRUMS

name
DAGM
SSDBM
PODS
ISMB
CVPR
WEBDB
CEAS
UAI
ICIP
SODA


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 5

In [8]:
sql = '''
SELECT publication.title
FROM author,
     conference,
     publication,
     writes
WHERE (publication.year >= 2000 and publication.year <= 2020)
  AND author.aid = writes.aid
  AND conference.cid = publication.cid
  AND publication.pid = writes.pid
GROUP BY publication.title;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

title
Convex Relaxations for Binary Image Partitioning and Perceptual Grouping.
Diversity between Neural Networks and Decision Trees for Building Multiple Classifier Systems.
Diversification and refinement in collaborative filtering recommender.
Optimizing the Recognition Rates of Unconstrained Handwritten Numerals Using Biorthogonal Spline Wavelets.
A sample-and-clean framework for fast and accurate query processing on dirty data.
The ACM PODS Alberto O. Mendelzon test-of-time award 2013.
"A quest for beauty and wealth (or, business processes for database researchers)."
Crowd-Based Data Sourcing - (Abstract).
Using Data for Systemic Financial Risk Management.
On Discriminative vs. Generative Classifiers: A comparison of logistic regression and naive Bayes.

title
Survey on Location Authentication Protocols and Spatial-Temporal Attestation Services.
Ranging and Communications with Impulse Radio Ultrawideband.
Security Efficiency Analysis of a Biometric Fuzzy Extractor for Iris Templates.
Collaborative Learning Patterns: Assisting the Development of Component-Based CSCL Applications.
Service-Oriented Device Integration for Ubiquitous Ambient Assisted Living Environments.
Effective Diagnosis of Alzheimer's Disease by Means of Distance Metric Learning.
Coding Region Prediction in Genomic Sequences Using a Combination of Digital Signal Processing Approaches.
PRIPARE: A New Vision on Engineering Privacy and Security by Design.
A Cooperative Paradigm for Fighting Information Overload.
Meandre Data-Intensive Application Infrastructure: Extreme Scalability for Cloud and/or Grid Computing.


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 6

In [9]:
sql = '''
SELECT domain.name
FROM conference,
     domain_conference,
     domain,
     domain_keyword,
     keyword
WHERE conference.cid = domain_conference.cid
  AND domain_conference.did = domain.did
  AND domain.did = domain_keyword.did
  AND domain_keyword.kid = keyword.kid
GROUP BY domain.name;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

name
Natural Language & Speech
Algorithms & Theory
Hardware & Architecture
Software Engineering
Bioinformatics & Computational Biology
Machine Learning & Pattern Recognition
Security & Privacy
Information Retrieval
Human-Computer Interaction
Computer Education

name
Scientific Computing
Computer Vision
Security & Privacy
Real-Time & Embedded Systems
Human-Computer Interaction
Networks & Communications
Information Retrieval
Hardware & Architecture
Natural Language & Speech
Distributed & Parallel Computing


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 7

In [10]:
sql = '''
SELECT author.name, conference.name
FROM author,
     conference,
     publication,
     writes,
     organization
WHERE author.aid = writes.aid
  AND writes.pid = publication.pid
  AND publication.cid = conference.cid
  AND author.oid = organization.oid;
'''
asqp_rl.query_asqp(sql)
demonstrate_asqp_rl(asqp_rl)

author,conference
Jeffrey O. Kephart,POLICY
Albert C. S. Chung,MICCAI
Marc Benkert,JCDCG
Phu Chien Nguyen,INTERSPEECH
Massimo Lauria,COCO
Xiang Li,ISBI
Sushant Patnaik,PODS
David M. Pennock,SIGECOM
Satish Narayanasamy,PLDI
Chang-Joo Moon,ASIASIM

author,conference
Hongbin Zha,ICML
Tova Milo,BIRTHDAY
Andy D. Castellano-Smith,BILDMED
Takafumi Kanamori,NIPS
Philip M. Long,COLT
Jielin Pan,ICNC
Christopher D. Manning,NIPS
Julia Stoyanovich,EDBT
Julia Stoyanovich,CIDR
Sudeepa Roy,ICDT


VBox(children=(Output(), RadioButtons(options=(('Left: ASQP-RL, Right: DB', 0), ('Left: DB, Right: ASQP-RL', 1…

Button(description='Finish', style=ButtonStyle())

Output()

### Question 8

In [11]:
# sql = '''

# '''
# asqp_rl.query_asqp(sql)
# demonstrate_asqp_rl(asqp_rl)

In [12]:
asqp_rl.save_answers_button()

Button(description='Finish', style=ButtonStyle())

Output()