In [1]:
import csv
import random


with open('./dataset_austin_sentences.csv') as csv_file:
    reader = csv.DictReader(csv_file)
    sentences = list(reader)

In [2]:
sentences_2016 = [sentence for sentence in sentences if sentence['year'] == '2016']
print('There are %d sentences in the 2016 survey' % len(sentences_2016))

There are 3005 sentences in the 2016 survey


In [3]:
from debater_python_api.api.debater_api import DebaterApi
from austin_utils import init_logger
import os

init_logger()
api_key = os.environ['DEBATER_API_KEY']
debater_api = DebaterApi(apikey=api_key)
keypoints_client = debater_api.get_keypoints_client()
domain = 'stance_demo'
keypoints_client.delete_domain_cannot_be_undone(domain)
keypoints_client.create_domain(domain, {'dont_split': True, 'do_stance_analysis': True})

2021-06-24 19:13:30,102 [INFO] keypoints_client.py 214: client calls service (delete): https://keypoint-matching-backend.debater.res.ibm.com/data
2021-06-24 19:13:31,207 [INFO] keypoints_client.py 214: client calls service (post): https://keypoint-matching-backend.debater.res.ibm.com/domains
2021-06-24 19:13:31,758 [INFO] keypoints_client.py 261: created domain: stance_demo with domain_params: {'dont_split': True, 'do_stance_analysis': True}


In [4]:
from austin_utils import print_top_and_bottom_k_sentences

def get_top_quality_sentences(sentences, top_k, topic):    
    arg_quality_client = debater_api.get_argument_quality_client()
    sentences_topic = [{'sentence': sentence['text'], 'topic': topic} for sentence in sentences]
    arg_quality_scores = arg_quality_client.run(sentences_topic)
    sentences_and_scores = zip(sentences, arg_quality_scores)
    sentences_and_scores_sorted = sorted(sentences_and_scores, key=lambda x: x[1], reverse=True)
    sentences_sorted = [sentence for sentence, _ in sentences_and_scores_sorted]
    print_top_and_bottom_k_sentences(sentences_sorted, 10)
    return sentences_sorted[:top_k]

sentences_2016_top_1000_aq = get_top_quality_sentences(sentences_2016, 1000, 'Austin is a great place to live')

ArgumentQualityClient: 100%|█████████▉| 3000/3005 [00:05<00:00, 559.06it/s]2021-06-24 19:13:37,715 [INFO] argument_quality_client.py 24: argument_quality_client.run = 5946.797132492065ms.


Top 10 quality sentences: 
	- Affordable housing is essential to keep Austin diverse, welcoming, and growing in the
	  ways that reflect the progressive ideals of this city and the future generations.
	- Austin has a unique charm, with high quality customer service,  and great quality of
	  life that will quickly deteriorate with this rapid urban sprawl.
	- We need to make sure our city continues to be an example across the country for
	  unwavering progress, with regards to energy, policing, fair housing and employment,
	  non-discrimination policy, and creating a sound infrastructure to accommodate the city's
	  rapid growth.
	- We need a coordinated effort by many agencies to find housing, training, rehab,
	  healthcare and work for our homeless.
	- Affordability and diversity (in culture and age groups) are extremely important to
	  keeping Austin the vibrant, egalitarian city it is.
	- KEEP AUSTIN QUALITY OF LIFE BY INVESTING IN TRAILS, BIKE LANES, SIDEWALKS, PARKS, THESE
	  ARE I

In [5]:
def run_kpa(sentences_ids, run_params):
    future = keypoints_client.start_kp_analysis_job(domain=domain, 
                                                    comments_ids=sentences_ids, 
                                                    run_params=run_params)

    kpa_result = future.get_result(high_verbosity=True, 
                                   polling_timout_secs=5)
    
    return kpa_result

In [6]:
sentences_texts = [sentence['text'] for sentence in sentences_2016_top_1000_aq]
sentences_ids = [sentence['id'] for sentence in sentences_2016_top_1000_aq]
keypoints_client.upload_comments(domain=domain, 
                                 comments_ids=sentences_ids, 
                                 comments_texts=sentences_texts,
                                 dont_split=True)

keypoints_client.wait_till_all_comments_are_processed(domain=domain)

2021-06-24 19:13:37,733 [INFO] keypoints_client.py 284: uploading 1000 comments in batches
2021-06-24 19:13:37,734 [INFO] keypoints_client.py 214: client calls service (post): https://keypoint-matching-backend.debater.res.ibm.com/comments
2021-06-24 19:13:38,869 [INFO] keypoints_client.py 301: uploaded 1000 comments, out of 1000
2021-06-24 19:13:38,870 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/comments
2021-06-24 19:13:39,628 [INFO] keypoints_client.py 313: domain: stance_demo, comments status: {'processed_comments': 0, 'pending_comments': 1000, 'processed_sentences': 0}
2021-06-24 19:13:49,632 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/comments
2021-06-24 19:13:50,195 [INFO] keypoints_client.py 313: domain: stance_demo, comments status: {'processed_comments': 1000, 'pending_comments': 0, 'processed_sentences': 1000}


In [7]:
from austin_utils import print_results

pos_result = run_kpa(sentences_ids, {'n_top_kps': 20, 'stances_to_run': ['pos'], 'stances_threshold': 0.5})
print_results(pos_result, n_sentences_per_kp=2, title='Positive results')

2021-06-24 19:13:50,202 [INFO] keypoints_client.py 214: client calls service (post): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:13:51,165 [INFO] keypoints_client.py 375: started a kp analysis job - domain: stance_demo, job_id: 60d4af3f93247fbd991aaf3a
2021-06-24 19:13:51,166 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:13:51,680 [INFO] keypoints_client.py 548: job_id 60d4af3f93247fbd991aaf3a is pending
ArgumentQualityClient: 100%|██████████| 3005/3005 [00:20<00:00, 559.06it/s]2021-06-24 19:13:56,685 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:13:57,292 [INFO] keypoints_client.py 552: job_id 60d4af3f93247fbd991aaf3a is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 0, 'total_batches': 1, 'batch_size': 2000}}


Stage 1/1: |--------------------------------------------------| 0.0% Complete



2021-06-24 19:14:02,296 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:03,100 [INFO] keypoints_client.py 555: job_id 60d4af3f93247fbd991aaf3a is done, returning result


Positive results coverage: 0.00
Positive results key points:


In [8]:
neg_result = run_kpa(sentences_ids, {'n_top_kps': 20, 'stances_to_run': ['neg', 'sug'], 'stances_threshold': 0.5})
print_results(neg_result, n_sentences_per_kp=2, title='Positive results')

2021-06-24 19:14:03,111 [INFO] keypoints_client.py 214: client calls service (post): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:04,139 [INFO] keypoints_client.py 375: started a kp analysis job - domain: stance_demo, job_id: 60d4af4c93247fbd991aaf3d
2021-06-24 19:14:04,140 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:04,739 [INFO] keypoints_client.py 548: job_id 60d4af4c93247fbd991aaf3d is pending
2021-06-24 19:14:09,743 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:10,368 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 0, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |--------------------------------------------------| 0.0% Complete



2021-06-24 19:14:15,374 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:15,954 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 0, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |--------------------------------------------------| 0.0% Complete



2021-06-24 19:14:20,958 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:21,560 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 0, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |--------------------------------------------------| 0.0% Complete



2021-06-24 19:14:26,562 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:27,174 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 2, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |█████---------------------------------------------| 10.0% Complete



2021-06-24 19:14:32,176 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:32,839 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 3, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |███████-------------------------------------------| 15.0% Complete



2021-06-24 19:14:37,843 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:38,468 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 8, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |████████████████████------------------------------| 40.0% Complete



2021-06-24 19:14:43,471 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:44,072 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 11, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |███████████████████████████-----------------------| 55.0% Complete



2021-06-24 19:14:49,076 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:49,671 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 14, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |███████████████████████████████████---------------| 70.0% Complete



2021-06-24 19:14:54,676 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:14:55,292 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 15, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |█████████████████████████████████████-------------| 75.0% Complete



2021-06-24 19:15:00,298 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:15:00,875 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 18, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |█████████████████████████████████████████████-----| 90.0% Complete



2021-06-24 19:15:05,881 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:15:06,496 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 19, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |███████████████████████████████████████████████---| 95.0% Complete



2021-06-24 19:15:11,501 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:15:12,235 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 20, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |██████████████████████████████████████████████████| 100.0% Complete




2021-06-24 19:15:17,238 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:15:17,974 [INFO] keypoints_client.py 552: job_id 60d4af4c93247fbd991aaf3d is running, progress: {'total_stages': 1, 'stage_1': {'inferred_batches': 20, 'total_batches': 20, 'batch_size': 2000}}


Stage 1/1: |██████████████████████████████████████████████████| 100.0% Complete




2021-06-24 19:15:22,978 [INFO] keypoints_client.py 214: client calls service (get): https://keypoint-matching-backend.debater.res.ibm.com/kp_extraction
2021-06-24 19:15:24,683 [INFO] keypoints_client.py 555: job_id 60d4af4c93247fbd991aaf3d is done, returning result


Positive results coverage: 43.99
Positive results key points:
83 - Traffic congestion needs major improvement
	- Austin need improved transportation infrastructure to alleviate current traffic and
	  accommodate rapid population growth.
	- Fast population growth is the cities biggest problem in areas such as congestion and
	  expensiveness.
75 - We need affordable housing.
	- Affordable housing is crucial, & keeping seniors in their homes is part of that challenge!
	- We need better housing solutions for low income and homeless individuals.
41 - Integrated transportation is critical.
	- a public transit train system is a necessity
	- Steiner Ranch area needs more access to public transportation (bus, train, etc>) and
	  taxi/ridesharing  to/from downtown and airport
28 - UTILITY RATES & PROPERTY TAXES ARE OUTRAGEOUS
	- PROPERTY TAXES ARE TOO HIGH.
	- PROPERTY TAXES ARE OUT OF CONTROL.
19 - THE HOMELESS POPULATION NEEDS MORE HELP/ATTENTION
	- HOMELESS SITUATION NEEDS TO BE ADDRESSED IN 

In [9]:
def set_stance_to_result(result, stance):
    for keypoint_matching in result['keypoint_matchings']:
        keypoint_matching['stance'] = stance
    return result

In [10]:
pos_result = set_stance_to_result(pos_result, 'pos')
neg_result = set_stance_to_result(neg_result, 'neg')

In [11]:
def sort_matchings(result):
    result['keypoint_matchings'].sort(key=lambda matchings: len(matchings['matching']), reverse=True)
    
def merge_two_results(result_con, result_pro):
    result = {'keypoint_matchings': result_con['keypoint_matchings'] + result_pro['keypoint_matchings']}
    sort_matchings(result)
    return result

In [12]:
merged_result = merge_two_results(pos_result, neg_result)

In [13]:
print_results(merged_result, n_sentences_per_kp=2, title='Merged result')

Merged result coverage: 43.12
Merged result key points:
83 - Traffic congestion needs major improvement - neg
	- Austin need improved transportation infrastructure to alleviate current traffic and
	  accommodate rapid population growth.
	- Fast population growth is the cities biggest problem in areas such as congestion and
	  expensiveness.
75 - We need affordable housing. - neg
	- Affordable housing is crucial, & keeping seniors in their homes is part of that challenge!
	- We need better housing solutions for low income and homeless individuals.
41 - Integrated transportation is critical. - neg
	- a public transit train system is a necessity
	- Steiner Ranch area needs more access to public transportation (bus, train, etc>) and
	  taxi/ridesharing  to/from downtown and airport
28 - UTILITY RATES & PROPERTY TAXES ARE OUTRAGEOUS - neg
	- PROPERTY TAXES ARE TOO HIGH.
	- PROPERTY TAXES ARE OUT OF CONTROL.
19 - THE HOMELESS POPULATION NEEDS MORE HELP/ATTENTION - neg
	- HOMELESS SITUATION N