# Austin tutorial.
In this tutorial we will use a community survey from Austin Texas....

## 1. KPA on 2016 random 1000 senetences.

### 1.A Read random sample of 1000 sentences from 2016 comments.

In [None]:
import csv
import random


with open('./dataset_austin_sentences.csv') as csv_file:
    reader = csv.DictReader(csv_file)
    sentences = list(reader)
        
sentences_2016 = list(filter(lambda sentence: sentence['Year'] == '2016', sentences))
random.seed(0)
random_sample_sentences_2016 = random.sample(sentences_2016, 1000)

### 1.B Run KPA on the random sample

In [None]:
from debater_python_api.api.debater_api import DebaterApi
from debater_python_api.api.clients.keypoints_client import KpAnalysisUtils 


KpAnalysisUtils.init_logger()
api_key = 'API_KEY'
debater_api = DebaterApi(apikey=api_key)
keypoints_client = debater_api.get_keypoints_client()
domain = 'kp_based_survey_example'

In [None]:
def run_kpa(sentences):
    sentences_texts = [sentence['texts'] for sentence in sentences]
    sentences_ids = [sentence['ids'] for sentence in sentences]

    keypoints_client.upload_comments(domain=domain,
                                     comments_ids=sentences_ids,
                                     comments_texts=sentences_texts,
                                     dont_split=True)

    keypoints_client.wait_till_all_comments_are_processed(domain)

    future = keypoints_client.start_kp_analysis_job(domain=domain, comments_ids=sentences_ids,
                                                    run_params={'n_top_kps': 20})

    kpa_result = future.get_result(high_verbosity=True, polling_timout_secs=5)
    return kpa_result

In [None]:
from demo_utils import print_results

kpa_result_random_1000_2016 = run_kpa(random_sample_sentences_2016)
print_results(kpa_result_random_1000_2016, n_sentences_per_kp=2, title='Random sample 2016')

### Improve coverage by taking highest quality sentences
bla bla

In [None]:
arg_quality_client = debater_api.get_argument_quality_client()

arg_quality_scores = arg_quality_client.run(
            [{'sentence': sentence['texts'], 'topic': 'Austin'} for sentence in sentences_2016])
sorted_aq_sentences_2016 = [sentence for _, sentence in sorted(zip(arg_quality_scores, sentences_2016), key=lambda x: x[0], reverse=True)]
top_aq_sentences_2016 = sorted_aq_sentences_2016[:1000]

!!!!Show top/lower AQ sentences

In [None]:
top_10_sentences = sorted_aq_sentences_2016[:10]
bottom_10_sentences = sorted_aq_sentences_2016[-10:]
print('top_10_sentences: ')
print('\n'.join([sentence['texts'] for sentence in top_10_sentences]))
print('\n\nbottom_10_sentences: ')
print('\n'.join([sentence['texts'] for sentence in bottom_10_sentences]))

In [None]:
kpa_result_top_aq_1000_2016 = run_kpa(top_aq_sentences_2016)
print_results(kpa_result_top_aq_1000_2016, n_sentences_per_kp=2, title='Top aq 2016')

In [None]:
def run_kpa(sentences):
    sentences_texts = [sentence['texts'] for sentence in sentences]
    sentences_ids = [sentence['ids'] for sentence in sentences]

    keypoints_client.upload_comments(domain=domain,
                                     comments_ids=sentences_ids,
                                     comments_texts=sentences_texts,
                                     dont_split=True)

    keypoints_client.wait_till_all_comments_are_processed(domain)

    future = keypoints_client.start_kp_analysis_job(domain=domain, comments_ids=sentences_ids,
                                                    run_params={'n_top_kps': 20, 
                                                                'clustering_threshold': 0.95, 
                                                                'mapping_threshold': 0.95})

    kpa_result = future.get_result(high_verbosity=True, polling_timout_secs=5)
    return kpa_result, future.get_job_id()

In [None]:
kpa_result_top_aq_1000_2016, kpa_top_aq_1000_2016_job_id = run_kpa(top_aq_sentences_2016)
print_results(kpa_result_top_aq_1000_2016, n_sentences_per_kp=2, title='Top aq 2016')

show one KP: top 5 matches bottom 5 matches

In [None]:
from demo_utils import print_top_and_bottom_matches_for_kp


print_top_and_bottom_matches_for_kp(kpa_result_top_aq_1000_2016, 'Traffic congestion needs major improvement', 5, 5)

In [None]:
sentences_2017 = list(filter(lambda sentence: sentence['Year'] == '2017', sentences))
arg_quality_scores = arg_quality_client.run(
            [{'sentence': sentence['texts'], 'topic': 'Austin'} for sentence in sentences_2017])
sorted_aq_sentences_2017 = [sentence for _, sentence in sorted(zip(arg_quality_scores, sentences_2017), key=lambda x: x[0], reverse=True)]
top_aq_sentences_2017 = sorted_aq_sentences_2017[:1000]

In [None]:
def run_kpa(sentences, key_points_by_job_id=None):
    sentences_texts = [sentence['texts'] for sentence in sentences]
    sentences_ids = [sentence['ids'] for sentence in sentences]

    keypoints_client.upload_comments(domain=domain,
                                     comments_ids=sentences_ids,
                                     comments_texts=sentences_texts,
                                     dont_split=True)

    keypoints_client.wait_till_all_comments_are_processed(domain)

    future = keypoints_client.start_kp_analysis_job(domain=domain, comments_ids=sentences_ids,
                                                    run_params={'n_top_kps': 20, 
                                                                'clustering_threshold': 0.95, 
                                                                'mapping_threshold': 0.95},
                                                    key_points_by_job_id=key_points_by_job_id)

    kpa_result = future.get_result(high_verbosity=True, polling_timout_secs=5)
    return kpa_result, future.get_job_id()

In [None]:
kpa_result_top_aq_1000_2017, _ = run_kpa(top_aq_sentences_2017, kpa_top_aq_1000_2016_job_id)
print_results(kpa_result_top_aq_1000_2017, n_sentences_per_kp=2, title='Top aq 2017, using 2016 key points')

In [None]:
compare_results(kpa_result_top_aq_1000_2016, '2016', kpa_result_top_aq_1000_2017, '2017')