In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import config
import sys
import os

config.root_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, config.root_path)

In [3]:
from db.dbv2 import Table, AugmentedTable, TrainTestTable
from src.dataset.utils import truncate_by_token, flatten, dedupe_list, truncate_string

### === Prepare Dataset ===

In [4]:
num_sentences = 200
offset = 500
max_segment_length = 99

In [5]:
def get_data(dataset_type: str):
    table = Table(dataset_type)
    
    all_segments = table.get_all_segments()
    
    segments = [[y[1] for y in x][:max_segment_length] for x in all_segments]
    segments_labels = [
        [1 if i == 0 else 0 for i, y in enumerate(x)][:max_segment_length] for x in all_segments
    ]
    
    flattened_segments = flatten(segments)
    flattened_labels = flatten(segments_labels)
    
    segments_to_test = flattened_segments[offset:offset+num_sentences]
    labels_to_test = flattened_labels[offset:offset+num_sentences]

    return segments_to_test, labels_to_test

### === Testing ===

In [6]:
from src.determinor import Determinor
from nltk.metrics.segmentation import pk, windowdiff

Testing Ollama

In [11]:
# for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8", "city", "disease", "manifesto"]:
for dataset_type in ["committee", "academic", "product"]:
# for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8"]:
# for dataset_type in ["city", "disease"]:
# for dataset_type in ["wiki50k"]:
# for dataset_type in ["manifesto"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: qmsum_committee
evaluating committee
.............................................................................|.......|..................................................................................................................
L: 00000000000000000000000000000000000000000100000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000010000000000000000000000000000
P: 00000000000000000000000000000000000000000000000000000000000000000000000000000100000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
k: 2, pk: 0.06030150753768844, wd: 0.06030150753768844
k: 3, pk: 0.09090909090909091, wd: 0.09090909090909091
k: 4, pk: 0.1218274111675127, wd: 0.1218274111675127
k: 5, pk: 0.15306122448979592, wd: 0.15306122448979592
k: 6, pk: 0.18461538461538463, wd: 0.18461538461538463
k: 7, pk: 0.21649484536082475, wd: 0.21649484536082475
k: 10, pk: 

deepseek

In [33]:
for dataset_type in ["committee", "academic", "product"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=True, deepseek=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: qmsum_committee
evaluating committee
........|.......||................

KeyboardInterrupt: 

Testing GPT 4o

In [12]:
for dataset_type in ["committee", "academic", "product"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=True, openai_4o=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: qmsum_committee
evaluating committee
.........................................|..||............|..|....................................................................................||.......................|............................
L: 00000000000000000000000000000000000000000100000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000010000000000000000000000000000
P: 00000000000000000000000000000000000000000100110000000000001001000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000000000000000010000000000000000000000000000
k: 2, pk: 0.05025125628140704, wd: 0.05025125628140704
k: 3, pk: 0.0707070707070707, wd: 0.0707070707070707
k: 4, pk: 0.08121827411167512, wd: 0.09137055837563451
k: 5, pk: 0.09183673469387756, wd: 0.11224489795918367
k: 6, pk: 0.10256410256410256, wd: 0.13333333333333333
k: 7, pk: 0.1134020618556701, wd: 0.15463917525773196
k: 10, pk: 0

Testing GPT o1

In [14]:
for dataset_type in ["committee", "academic", "product"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=True, openai_o1=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: qmsum_committee
evaluating committee
.....|....|.............||...................|........|......|.......................|.........................|.............|....................||........||..|..........|..................|.........
L: 00000000000000000000000000000000000000000100000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000010000000000000000000000000000
P: 00000100001000000000000011000000000000000000010000000010000001000000000000000000000001000000000000000000000000010000000000000100000000000000000000110000000011001000000000010000000000000000001000000000
k: 2, pk: 0.135678391959799, wd: 0.135678391959799
k: 3, pk: 0.19696969696969696, wd: 0.19696969696969696
k: 4, pk: 0.25380710659898476, wd: 0.25888324873096447
k: 5, pk: 0.3010204081632653, wd: 0.3112244897959184
k: 6, pk: 0.3435897435897436, wd: 0.358974358974359
k: 7, pk: 0.38144329896907214, wd: 0.4020618556701031
k: 10, pk: 0.471204

In [32]:
for dataset_type in ["committee", "academic", "product"]:
    table = Table(dataset_type)
    
    all_segments = table.get_all_segments()
    
    segments = [[y[1] for y in x] for x in all_segments]
    segments_labels = [
        [1 if i == 0 else 0 for i, y in enumerate(x)] for x in all_segments
    ]
    flattened_segments = flatten(segments)
    flattened_labels = flatten(segments_labels)
    
    print(len(flattened_labels)//flattened_labels.count(1))

Using dataset: qmsum_committee
54
Using dataset: qmsum_academic
267
Using dataset: qmsum_product
136
