In [1]:
import csv
datafile = 'data/write_in_responses.tsv'

lines = []
with open(datafile) as f:
    reader = csv.reader(f, delimiter='\t')
    header = next(reader)
    for line in reader:
        if len(line) != 13:
            print(len(line))
        lines.append(line)       

In [2]:
import re
pattern = 'Question ([0-9]+) - (.+)'
question_numbers = []
questions = []
for elem in header:
    matched = re.findall(pattern, elem)[0]
    question_numbers.append(matched[0])
    questions.append(int(matched[1])
    
print(question_numbers)
print(questions)

['7', '9', '10', '11', '12', '13', '14', '17', '18', '19', '20', '21', '23']
['If you do have an accessibility need(s) or impairment(s), how do you manage the impact it has on how you receive information in poster or oral presentations? ', 'When attending a poster session, what is your goal in receiving information? Please rate your priority in learning about each of the of the following aspects of the study on a scale of 1 to 5:', 'Traditional layout: Please rate how this poster design impacts your ability to gain information on a scale from 1 to 5. ', '#BetterPoster layout: Please rate how this poster design impacts your ability to gain information on a scale from 1 to 5. ', 'Infographic layout eaxample 1: Please rate how this poster design impacts your ability to gain information on a scale from 1 to 5.', 'Infographic layout eaxample 2: Please rate how this poster design impacts your ability to gain information on a scale from 1 to 5. ', 'What improvements, if any, would you like to

In [3]:
from collections import defaultdict, namedtuple

Response = namedtuple('Response', 'row_id text')

question_answers = defaultdict(list)
# may want to keep the information from the people to correlate with 
# the data in the other questions (e.g., if the person had a visual 
# limitation, are they more likely to request visual accomodation?)
for row_id, line in enumerate(lines):
    for i, resp in enumerate(line):
        if resp:
            question_answers[question_numbers[i]].append(Response(row_id=row_id, text=resp))

question_answers.keys()

dict_keys(['19', '20', '10', '11', '14', '18', '21', '12', '13', '23', '7', '17', '9'])

In [4]:
from collections import Counter
import string

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
stops = stops = {
    "a",
    "about",
    "again",
    "am",
    "an",
    "are",
    "and",
    "as",
    "at",
    "be",
    "both",
    "but",
    "by",
    "can",
    "could",
    "did",
    "do",
    "does",
    "doing",
    "during",
    "each",
    "for",
    "from",
    "further",
    "had",
    "has",
    "have",
    "having",
    "he",
    "he'd",
    "he'll",
    "he's",
    "her",
    "here",
    "here's",
    "hers",
    "herself",
    "him",
    "himself",
    "his",
    "how",
    "how's",
    "i",
    "i'd",
    "i'll",
    "i'm",
    "i've",
    "if",
    "in",
    "into",
    "is",
    "it",
    "it's",
    "its",
    "itself",
    "let's",
    "me",
    "more",
    "most",
    "my",
    "myself",
    "no",
    "nor",
    "not",
    "of",
    "or",
    "other",
    "ought",
    "our",
    "ours	ourselves",
    "out",
    "own",
    "shan't",
    "she",
    "she'd",
    "she'll",
    "she's",
    "should",
    "so",
    "some",
    "such",
    "than",
    "that",
    "that's",
    "the",
    "their",
    "theirs",
    "them",
    "themselves",
    "then",
    "there",
    "there's",
    "these",
    "they",
    "they'd",
    "they'll",
    "they're",
    "they've",
    "this",
    "those",
    "to",
    "too",
    "up",
    "very",
    "was",
    "wasn't",
    "we",
    "we'd",
    "we'll",
    "we're",
    "we've",
    "were",
    "weren't",
    "what",
    "what's",
    "when",
    "when's",
    "where",
    "where's",
    "which",
    "while",
    "who",
    "who's",
    "whom",
    "why",
    "why's",
    "with",
    "won't",
    "would",
    "you",
    "you'd",
    "you'll",
    "you're",
    "you've",
    "your",
    "yours",
    "yourself",
    "yourselves",
}



def ngram_counter(n: int, texts: list[str], lower: bool = True) -> Counter[str]:
    c = Counter()
    for text in texts:
        if lower:
            text = text.lower()
        for s in sliding_ngram_windows(text, n):
            s = ' '.join(s)
            c[s] += 1
    return c


def prune_stop_tokens(toks: list[str]) -> list[str]:
    return [t for t in toks if t.lower() not in stops]


def sliding_ngram_windows(s: str, n: int, keep_punct: bool = False) -> list[list[str]]:
    toks = strip_and_split(s) if not keep_punct else s.split()
    pruned = prune_stop_tokens(toks)
    return [pruned[i : i + n] for i in range(0, len(pruned) - n + 1)]


def strip_and_split(s: str) -> list[str]:
    return strip_punct(s).split()


def strip_punct(s: str) -> str:
    return s.translate(str.maketrans('', '', string.punctuation))


In [5]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import AgglomerativeClustering
import nltk
import numpy as np

from sklearn import metrics

embedder = SentenceTransformer('all-MiniLM-L6-v2')


def create_sentence_corpus(passages):
    corpus = []
    for text in passages:
        for sent in nltk.sent_tokenize(text):
            corpus.append(sent)
    return corpus


def embed_corpus(corp):
    embs = embedder.encode(corp)
    # Normalize the embeddings to unit length
    embs = embs /  np.linalg.norm(embs, axis=1, keepdims=True)
    return embs


def agg_cluster(sents, embeds, dist_thresh, linkage):
    clustering_model = AgglomerativeClustering(n_clusters=None, metric='cosine', linkage=linkage, distance_threshold=dist_thresh)
    clustering_model.fit(embeds)
    cluster_assignment = clustering_model.labels_

    clustered_sentences = defaultdict(list)
    clustered_embeddings = defaultdict(list)
    for sentence_id, cluster_id in enumerate(cluster_assignment):
        if True:
            clustered_sentences[cluster_id].append(sents[sentence_id])
            clustered_embeddings[cluster_id].append(embeds[sentence_id])
    
    print(metrics.silhouette_score(embeds, cluster_assignment, metric='cosine'))
    
    return clustered_sentences, clustered_embeddings

def mean_shift_cluster(sents, embeds):
    from sklearn.cluster import MeanShift
    
    clustering_model = MeanShift(cluster_all=False)
    clustering_model.fit(embeds)
    cluster_assignment = clustering_model.labels_
    
    clustered_sentences = defaultdict(list)
    clustered_embeddings = defaultdict(list)
    for sentence_id, cluster_id in enumerate(cluster_assignment):
        if True:
            clustered_sentences[cluster_id].append(sents[sentence_id])
            clustered_embeddings[cluster_id].append(embeds[sentence_id])
    
    print(metrics.silhouette_score(embeds, cluster_assignment, metric='cosine'))
    
    return clustered_sentences, clustered_embeddings
    


In [12]:
from sklearn.metrics.pairwise import cosine_similarity
from functools import partial

def avg_cosine_sim(cluster_embeddings):
    sims = []
    if len(cluster_embeddings) == 1:
        return 0.0
    for i, emb in enumerate(cluster_embeddings):
        for j, emb2 in enumerate(cluster_embeddings):
            if i != j:
                sims.append(cosine_similarity(np.reshape(emb, (1, -1)), np.reshape(emb2, (1, -1))))
    return sum(sims) / len(sims)

def score_clusters(clustered_sentences, clustered_embeddings):
    scored = []
    for cluster_id, embs in clustered_embeddings.items():
        scored.append((cluster_id, avg_cosine_sim(embs), clustered_sentences[cluster_id]))
    return scored

def display_scored_clusters(scored_clusters):
    for (cluster_id, score, cluster) in sorted(scored_clusters, key=lambda tup: tup[1], reverse=True):
        print("Cluster ", cluster_id+1)
        print("Avg cosine sim: ", score)
        print(cluster)
        print("")

def cluster(corpus, embeddings, dist_thresh, display=True, linkage='complete', cluster_method='agg'):
    cluster_func = None
    if cluster_method == 'agg':
        cluster_func = partial(agg_cluster, dist_thresh=dist_thresh, linkage=linkage)
    elif cluster_method == 'meanshift':
        cluster_func = mean_shift_cluster
        
    
    clustered_sentences, clustered_embeddings = cluster_func(sents=corpus, embeds=embeddings)
    scored = score_clusters(clustered_sentences, clustered_embeddings)
    if display:
        display_scored_clusters(scored)
    return scored


In [2]:
from pathlib import Path
from transformers import pipeline
from tqdm import tqdm

summarizer = pipeline("summarization", model='knkarthick/MEETING_SUMMARY')

def summarize(text):
    num_toks = len(text.split(' '))
    summary = summarizer(text, max_length=30)
    return summary

def create_summaries(scored):
    summaries = []
    sorted_clusters = sorted(scored, key=lambda tup: tup[1], reverse=True)
    for i in tqdm(range(0, len(scored)), desc ="Summarizing clusters"):
        cluster_sents = sorted_clusters[i][2]
        try:
            summaries.append(summarize(' '.join(cluster_sents)))
        except:
            summaries.append('ERROR: was unable to summarize')
    return summaries
    
def export_review_file(fn, num_responses, scored, summaries):
    with open(fn, 'w') as f:
        print(f'Number of responses to the question: {num_responses}', file=f)
        print(f'Number of clusters found: {len(scored)}', file=f)
        more_than_one = [x for x in scored if len(x[2]) > 1]
        print(f'Number of clusters with more than one sentence...: {len(more_than_one)}\n', file=f)
        for i, (cluster_id, score, cluster) in enumerate(sorted(scored, key=lambda tup: tup[1], reverse=True)):
            print(f"Cluster {cluster_id+1}\n", file=f)
            print(f"Avg cosine similarity of this cluster: {score}", file=f)
            print(f"Number of sentences in this cluster: {len(cluster)}\n", file=f)
            print("Cluster sentences, in no particular order:", file=f)
            for sent in cluster:
                print(f'\t{sent}', file=f)
            summary = summaries[i]
            print(f"\nML Cluster Summary: {summary}", file=f)
            print("\n\n--------------------------------\n\n", file=f)




  from .autonotebook import tqdm as notebook_tqdm


In [25]:
def sentence_level_analysis(question_number, corpus, dist_thresh, linkage, cluster_method='agg'):
    num_responses = len(corpus)
    corpus = create_sentence_corpus(corpus)
    corpus_embeddings = embed_corpus(corpus)

    scored_curr = cluster(corpus, corpus_embeddings, dist_thresh, display=False, linkage=linkage, cluster_method=cluster_method)
    summaries_curr = create_summaries(scored_curr)             

    outdir = Path('/Users/sharpr1/data/accessibility') 
    outfile = outdir / f'clusters_for_question_{question_number}_sentence_level_{cluster_method}_{linkage}_{dist_thresh}.txt'
    export_review_file(outfile, num_responses, scored_curr, summaries_curr)

In [10]:
question_answers.keys()

dict_keys(['19', '20', '10', '11', '14', '18', '21', '12', '13', '23', '7', '17', '9'])

In [27]:
for question_number, responses in question_answers.items():
    q_texts = [r.text for r in responses]
    print("Processing responses for question", question_number)
    sentence_level_analysis(question_number, q_texts, 0.7, 'complete')


Processing responses for question 19
0.12319375


Summarizing clusters:   0%|                                                                                            | 0/50 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  32%|██████████████████████████▌                                                        | 16/50 [00:31<01:04,  1.89s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  44%|████████████████████████████████████▌                                              | 22/50 [00:45<01:06,  2.38s/it]Your max_length is set to 30, but you input_length is only 21. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  50%|█████████████████████████████████████████▌                                         | 25/50 

Processing responses for question 20
0.1191813


Summarizing clusters:   0%|                                                                                            | 0/56 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 24. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:   2%|█▌                                                                                  | 1/56 [00:01<01:06,  1.22s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:   5%|████▌                                                                               | 3/56 [00:03<01:09,  1.30s/it]Your max_length is set to 30, but you input_length is only 22. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  21%|█████████████████▊                                                                 | 12/56 

Processing responses for question 10
0.10961497


Summarizing clusters:   7%|██████                                                                              | 3/42 [00:06<01:29,  2.31s/it]Your max_length is set to 30, but you input_length is only 25. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  10%|████████                                                                            | 4/42 [00:08<01:14,  1.96s/it]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  48%|███████████████████████████████████████▌                                           | 20/42 [00:42<00:43,  1.97s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  57%|███████████████████████████████████████████████▍                                   | 24/42 

Processing responses for question 11
0.14006957


Summarizing clusters:   0%|                                                                                            | 0/55 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  13%|██████████▋                                                                         | 7/55 [00:12<01:22,  1.71s/it]Your max_length is set to 30, but you input_length is only 25. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  18%|███████████████                                                                    | 10/55 [00:16<01:16,  1.69s/it]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  24%|███████████████████▌                                                               | 13/55 

Processing responses for question 14
0.12365892


Summarizing clusters:   0%|                                                                                            | 0/81 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 8. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:   1%|█                                                                                   | 1/81 [00:01<01:58,  1.48s/it]Your max_length is set to 30, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:   6%|█████▏                                                                              | 5/81 [00:08<02:24,  1.91s/it]Your max_length is set to 30, but you input_length is only 26. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:   9%|███████▎                                                                            | 7/81 [00

Summarizing clusters:  98%|████████████████████████████████████████████████████████████████████████████████▉  | 79/81 [02:20<00:02,  1.43s/it]Your max_length is set to 30, but you input_length is only 19. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  99%|█████████████████████████████████████████████████████████████████████████████████▉ | 80/81 [02:21<00:01,  1.34s/it]Your max_length is set to 30, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 81/81 [02:22<00:00,  1.76s/it]


Processing responses for question 18
0.09439687


Summarizing clusters:  50%|█████████████████████████████████████████▌                                         | 12/24 [00:29<00:25,  2.16s/it]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  54%|████████████████████████████████████████████▉                                      | 13/24 [00:30<00:20,  1.86s/it]Your max_length is set to 30, but you input_length is only 24. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  58%|████████████████████████████████████████████████▍                                  | 14/24 [00:31<00:16,  1.66s/it]Your max_length is set to 30, but you input_length is only 22. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  62%|███████████████████████████████████████████████████▉                               | 15/24 

Processing responses for question 21
0.09784919


Summarizing clusters:   0%|                                                                                            | 0/70 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 7. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=3)
Summarizing clusters:   4%|███▌                                                                                | 3/70 [00:04<01:32,  1.39s/it]Your max_length is set to 30, but you input_length is only 24. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  13%|██████████▊                                                                         | 9/70 [00:15<01:48,  1.78s/it]Your max_length is set to 30, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  16%|█████████████                                                                      | 11/70 [00

Processing responses for question 12
0.123674534


Summarizing clusters:  18%|███████████████▍                                                                    | 9/49 [00:16<01:19,  1.98s/it]Your max_length is set to 30, but you input_length is only 19. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  33%|███████████████████████████                                                        | 16/49 [00:31<01:07,  2.03s/it]Your max_length is set to 30, but you input_length is only 6. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=3)
Summarizing clusters:  39%|████████████████████████████████▏                                                  | 19/49 [00:36<00:57,  1.90s/it]Your max_length is set to 30, but you input_length is only 19. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  49%|████████████████████████████████████████▋                                          | 24/49 [00:

Processing responses for question 13
0.1439448


Summarizing clusters:   0%|                                                                                            | 0/51 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 9. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:   2%|█▋                                                                                  | 1/51 [00:01<00:56,  1.12s/it]Your max_length is set to 30, but you input_length is only 25. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:   6%|████▉                                                                               | 3/51 [00:05<01:43,  2.16s/it]Your max_length is set to 30, but you input_length is only 24. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:   8%|██████▌                                                                             | 4/51 [0

Summarizing clusters:  98%|█████████████████████████████████████████████████████████████████████████████████▎ | 50/51 [01:20<00:01,  1.26s/it]Your max_length is set to 30, but you input_length is only 9. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 51/51 [01:21<00:00,  1.60s/it]


Processing responses for question 23
0.12271162


Summarizing clusters:   0%|                                                                                            | 0/93 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 15. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Summarizing clusters:   1%|▉                                                                                   | 1/93 [00:01<01:45,  1.15s/it]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:   5%|████▌                                                                               | 5/93 [00:07<02:15,  1.54s/it]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  11%|████████▉                                                                          | 10/93 [

Summarizing clusters:  89%|██████████████████████████████████████████████████████████████████████████         | 83/93 [02:26<00:13,  1.34s/it]Your max_length is set to 30, but you input_length is only 8. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:  90%|██████████████████████████████████████████████████████████████████████████▉        | 84/93 [02:27<00:11,  1.27s/it]Your max_length is set to 30, but you input_length is only 8. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:  91%|███████████████████████████████████████████████████████████████████████████▊       | 85/93 [02:28<00:09,  1.21s/it]Your max_length is set to 30, but you input_length is only 11. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters:  92%|████████████████████████████████████████████████████████████████████████████▊      | 86/93 [02:2

Processing responses for question 7
0.119003326


Summarizing clusters:   0%|                                                                                            | 0/28 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 9. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:   4%|███                                                                                 | 1/28 [00:01<00:36,  1.35s/it]Your max_length is set to 30, but you input_length is only 10. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters:  14%|████████████                                                                        | 4/28 [00:06<00:37,  1.58s/it]Your max_length is set to 30, but you input_length is only 13. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Summarizing clusters:  29%|████████████████████████                                                            | 8/28 [00:

Processing responses for question 17
0.16059878


Summarizing clusters:   8%|███████                                                                             | 1/12 [00:01<00:15,  1.45s/it]Your max_length is set to 30, but you input_length is only 21. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  25%|█████████████████████                                                               | 3/12 [00:04<00:14,  1.56s/it]Your max_length is set to 30, but you input_length is only 28. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  33%|████████████████████████████                                                        | 4/12 [00:06<00:12,  1.59s/it]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  42%|███████████████████████████████████                                                 | 5/12 

Processing responses for question 9
0.11876916


Summarizing clusters:  13%|███████████▏                                                                        | 2/15 [00:05<00:32,  2.49s/it]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  40%|█████████████████████████████████▌                                                  | 6/15 [00:13<00:19,  2.18s/it]Your max_length is set to 30, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  47%|███████████████████████████████████████▏                                            | 7/15 [00:14<00:14,  1.87s/it]Your max_length is set to 30, but you input_length is only 8. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:  53%|████████████████████████████████████████████▊                                       | 8/15 [00

In [29]:
for question_number, responses in question_answers.items():
    q_texts = [r.text for r in responses]
    print("Processing responses for question", question_number)
    sentence_level_analysis(question_number, q_texts, 0.9, 'complete')

Processing responses for question 19
0.11434844


Summarizing clusters:   5%|████▍                                                                               | 1/19 [00:01<00:33,  1.88s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  84%|█████████████████████████████████████████████████████████████████████▉             | 16/19 [00:38<00:06,  2.16s/it]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  89%|██████████████████████████████████████████████████████████████████████████▎        | 17/19 [00:40<00:03,  1.97s/it]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  95%|██████████████████████████████████████████████████████████████████████████████▋    | 18/19 

Processing responses for question 20
0.07048876


Summarizing clusters:  89%|█████████████████████████████████████████████████████████████████████████▊         | 16/18 [00:36<00:04,  2.05s/it]Your max_length is set to 30, but you input_length is only 15. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 18/18 [00:40<00:00,  2.24s/it]


Processing responses for question 10
0.1052975


Summarizing clusters:   0%|                                                                                            | 0/19 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 23. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Summarizing clusters:  79%|█████████████████████████████████████████████████████████████████▌                 | 15/19 [00:34<00:08,  2.18s/it]Your max_length is set to 30, but you input_length is only 26. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:  84%|█████████████████████████████████████████████████████████████████████▉             | 16/19 [00:35<00:05,  1.89s/it]Your max_length is set to 30, but you input_length is only 16. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)
Summarizing clusters:  89%|██████████████████████████████████████████████████████████████████████████▎        | 17/19 [

Processing responses for question 11
0.11137779


Summarizing clusters:  89%|█████████████████████████████████████████████████████████████████████████▊         | 16/18 [00:38<00:03,  1.96s/it]Your max_length is set to 30, but you input_length is only 11. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 18/18 [00:41<00:00,  2.31s/it]


Processing responses for question 14
0.10970743


Summarizing clusters:  39%|████████████████████████████████▌                                                  | 11/28 [00:22<00:28,  1.70s/it]Your max_length is set to 30, but you input_length is only 25. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  71%|███████████████████████████████████████████████████████████▎                       | 20/28 [00:45<00:18,  2.28s/it]Your max_length is set to 30, but you input_length is only 27. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:  79%|█████████████████████████████████████████████████████████████████▏                 | 22/28 [00:48<00:12,  2.14s/it]Your max_length is set to 30, but you input_length is only 14. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Summarizing clusters:  89%|██████████████████████████████████████████████████████████████████████████         | 25/28 [

Processing responses for question 18
0.1511304


Summarizing clusters:  90%|███████████████████████████████████████████████████████████████████████████▌        | 9/10 [00:16<00:01,  1.86s/it]Your max_length is set to 30, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.77s/it]


Processing responses for question 21
0.1011382


Summarizing clusters:   0%|                                                                                            | 0/26 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 28. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  88%|█████████████████████████████████████████████████████████████████████████▍         | 23/26 [00:44<00:05,  1.69s/it]Your max_length is set to 30, but you input_length is only 29. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Summarizing clusters:  92%|████████████████████████████████████████████████████████████████████████████▌      | 24/26 [00:45<00:03,  1.54s/it]Your max_length is set to 30, but you input_length is only 10. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters:  96%|███████████████████████████████████████████████████████████████████████████████▊   | 25/26 [

Processing responses for question 12
0.08093963


Summarizing clusters:  11%|████████▊                                                                           | 2/19 [00:05<00:42,  2.52s/it]Your max_length is set to 30, but you input_length is only 21. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  68%|████████████████████████████████████████████████████████▊                          | 13/19 [00:29<00:12,  2.06s/it]Your max_length is set to 30, but you input_length is only 13. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Summarizing clusters: 100%|███████████████████████████████████████████████████████████████████████████████████| 19/19 [00:39<00:00,  2.06s/it]


Processing responses for question 13
0.11811813


Summarizing clusters:  11%|█████████▎                                                                          | 2/18 [00:04<00:36,  2.28s/it]Your max_length is set to 30, but you input_length is only 21. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  17%|██████████████                                                                      | 3/18 [00:06<00:31,  2.11s/it]Your max_length is set to 30, but you input_length is only 24. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Summarizing clusters:  78%|████████████████████████████████████████████████████████████████▌                  | 14/18 [00:29<00:07,  1.88s/it]Your max_length is set to 30, but you input_length is only 27. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:  94%|██████████████████████████████████████████████████████████████████████████████▍    | 17/18 

Processing responses for question 23
0.10809209


Summarizing clusters:   3%|██▍                                                                                 | 1/35 [00:02<01:24,  2.49s/it]Your max_length is set to 30, but you input_length is only 13. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Summarizing clusters:  14%|████████████                                                                        | 5/35 [00:10<01:03,  2.11s/it]Your max_length is set to 30, but you input_length is only 27. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:  60%|█████████████████████████████████████████████████▊                                 | 21/35 [00:51<00:35,  2.55s/it]Your max_length is set to 30, but you input_length is only 10. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters:  77%|████████████████████████████████████████████████████████████████                   | 27/35 [0

Processing responses for question 7
0.11450113


Summarizing clusters:   0%|                                                                                            | 0/12 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 27. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)
Summarizing clusters:  17%|██████████████                                                                      | 2/12 [00:03<00:17,  1.72s/it]Your max_length is set to 30, but you input_length is only 19. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Summarizing clusters:  75%|███████████████████████████████████████████████████████████████                     | 9/12 [00:18<00:06,  2.04s/it]Your max_length is set to 30, but you input_length is only 12. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Summarizing clusters:  92%|████████████████████████████████████████████████████████████████████████████       | 11/12 [0

Processing responses for question 17
0.13288675


Summarizing clusters:   0%|                                                                                             | 0/6 [00:00<?, ?it/s]Your max_length is set to 30, but you input_length is only 9. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Summarizing clusters:  17%|██████████████▏                                                                      | 1/6 [00:01<00:06,  1.34s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters:  83%|██████████████████████████████████████████████████████████████████████▊              | 5/6 [00:08<00:01,  1.84s/it]Your max_length is set to 30, but you input_length is only 20. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Summarizing clusters: 100%|█████████████████████████████████████████████████████████████████████████████████████| 6/6 [0

Processing responses for question 9
0.103431344


Summarizing clusters:  50%|██████████████████████████████████████████▌                                          | 3/6 [00:06<00:06,  2.05s/it]Your max_length is set to 30, but you input_length is only 11. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Summarizing clusters: 100%|█████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:11<00:00,  1.97s/it]
