In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import config
import sys
import os

config.root_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, config.root_path)

In [5]:
from db.dbv2 import Table, AugmentedTable, TrainTestTable
from src.dataset.utils import truncate_by_token, flatten, dedupe_list, truncate_string

### === Prepare Dataset ===

In [6]:
num_sentences = 300
offset = 500
max_segment_length = 99

In [7]:
def get_data(dataset_type: str):
    table = Table(dataset_type)
    
    all_segments = table.get_all_segments()
    
    segments = [[y[1] for y in x][:max_segment_length] for x in all_segments]
    segments_labels = [
        [1 if i == 0 else 0 for i, y in enumerate(x)][:max_segment_length] for x in all_segments
    ]
    
    flattened_segments = flatten(segments)
    flattened_labels = flatten(segments_labels)
    
    segments_to_test = flattened_segments[offset:offset+num_sentences]
    labels_to_test = flattened_labels[offset:offset+num_sentences]

    return segments_to_test, labels_to_test

### === Testing ===

In [8]:
from src.determinor import Determinor
from nltk.metrics.segmentation import pk, windowdiff

Testing Ollama

In [9]:
# for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8", "city", "disease", "manifesto"]:
# for dataset_type in ["committee", "academic", "product"]:
for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8"]:
# for dataset_type in ["city", "disease"]:
# for dataset_type in ["wiki50k"]:
# for dataset_type in ["manifesto"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=7, meeting_dataset=False)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: choi_3_5
evaluating choi_3_5
..|.|...|.||..|.....|.|...||||.|.|.|...|..|.||....|...|.|.|.|....|..|....|....|..||..|...|...|....|..|..||.|.||....|.|..|.|.|..|....|....|....|....|.||...|..|....|....|..|..|.|..|..|....|...|....|..|...|.|.|..|.|.|..|....|...||||..|..|...|..|...|....|||..|.|..||..|...||.|....|...|....|...||||||...|.
L: 000010001001001000010010001000010010000100100100001000100100100001001000010000100100010001000100001001001001001000010000100010010000100001000010000100100010010000100001001000010010010000100010000100100010001001000100100001000100100100100010010001000010000100001001000010010000100010001000010001000010
P: 001010001011001000001010001111010101000100101100001000101010100001001000010000100110010001000100001001001101011000010100101010010000100001000010000101100010010000100001001001010010010000100010000100100010101001010100100001000111100100100010010001000011100101001100100011010000100010000100011111100010
k: 2, pk: 0.13712374581939799, wd: 0.1872909698

Testing GPT 4o

In [10]:
for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=False, openai_4o=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: choi_3_5
evaluating choi_3_5
....|..||..|..|..|.||.|||.|....|..||...||||.||....|...|.|||.|....|..|....|....|..|...|...|...|....|..|..||.|..|....|....|...|..|....|....|....|....|..|...||.|....|....|..||.|.||.|..|....|...|....|..|...|.||||.|...||.|....|...||||..|..|...|.||...|....||.|.|||..||.|....|..|....|...|...|....|...|....|.
L: 000010001001001000010010001000010010000100100100001000100100100001001000010000100100010001000100001001001001001000010000100010010000100001000010000100100010010000100001001000010010010000100010000100100010001001000100100001000100100100100010010001000010000100001001000010010000100010001000010001000010
P: 000010011001001001011011101000010011000111101100001000101110100001001000010000100100010001000100001001001101001000010000100010010000100001000010000100100011010000100001001101011010010000100010000100100010111101000110100001000111100100100010110001000011010111001101000010010000100010001000010001000010
k: 2, pk: 0.09698996655518395, wd: 0.1705685618

Testing GPT o1

In [11]:
for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8"]:
    segments, labels = get_data(dataset_type)

    print(f"evaluating {dataset_type}")
    determinor = Determinor(max_context_window=5, meeting_dataset=False, openai_o1=True)
    predictions = determinor.query_batch_data(segments)

    preds = [0 if p == True else 1 for p in predictions]

    str_labels = ''.join([str(x) for x in labels])
    str_predictions = ''.join([str(x) for x in preds])
    print()
    print(f"L: {str_labels}")
    print(f"P: {str_predictions}")

    for k in [2,3,4,5,6,7,10,14,20]:
        print(f"k: {k}, pk: {pk(str_labels, str_predictions, k=k)}, wd: {windowdiff(str_labels, str_predictions, k=k)}")

Using dataset: choi_3_5
evaluating choi_3_5
|...|...|..|..|....||.|||.|....|..|....||||..|....|...|..||.|....|..|....|....|..||..|...|...|....|..|..||.|..|....|.|..|||||..|....|....|....|.|..|.||...|..|.|..|....|..|||..|..|..|....|...|....|..||..|.||||.|...|..|....|...||||..|..|...|..|...|....|....|.|..|..||...|..|.|..|...|...||...|..||....|.
L: 000010001001001000010010001000010010000100100100001000100100100001001000010000100100010001000100001001001001001000010000100010010000100001000010000100100010010000100001001000010010010000100010000100100010001001000100100001000100100100100010010001000010000100001001000010010000100010001000010001000010
P: 100010001001001000011011101000010010000111100100001000100110100001001000010000100110010001000100001001001101001000010100111110010000100001000010100101100010010100100001001110010010010000100010000100110010111101000100100001000111100100100010010001000010000101001001100010010100100010001100010011000010
k: 2, pk: 0.10367892976588629, wd: 0.1672240802

In [32]:
for dataset_type in ["choi_3_5", "choi_3_11", "choi_9_11", "choi_6_8"]:
    table = Table(dataset_type)
    
    all_segments = table.get_all_segments()
    
    segments = [[y[1] for y in x] for x in all_segments]
    segments_labels = [
        [1 if i == 0 else 0 for i, y in enumerate(x)] for x in all_segments
    ]
    flattened_segments = flatten(segments)
    flattened_labels = flatten(segments_labels)
    
    print(len(flattened_labels)//flattened_labels.count(1))

Using dataset: qmsum_committee
54
Using dataset: qmsum_academic
267
Using dataset: qmsum_product
136
