In [33]:
# Run if working locally
%load_ext autoreload
%autoreload 2
%load_ext nb_black

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [34]:
import sqlite3
from sqlite3 import Error
import pickle
import os, sys
import config

config.root_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, config.root_path)

from src.dataset.dataset import RawData
from src.dataset.wikisection_preprocessing import (
    tokenize,
    clean_sentence,
    preprocess_text_segmentation,
    format_data_for_db_insertion,
)
from src.dataset.utils import truncate_by_token
from db.dbv2 import Table, AugmentedTable, TrainTestTable
import pprint


from utils.metrics import windowdiff, pk

from src.bertkeywords.src.similarities import Embedding, Similarities
from src.bertkeywords.src.keywords import Keywords
from src.encoders.coherence_v2 import Coherence
from src.dataset.utils import flatten, dedupe_list, truncate_string
from src.experimentation.coherence_v2 import SimpleExperiment, CoherenceExperiment

<IPython.core.display.Javascript object>

In [37]:
experimentation = SimpleExperiment()

# constants
NUM_SAMPLES = 2000
#         MODEL_STRING="bert-base-uncased",
#         MODEL_STRING="sentence-transformers/LaBSE",
MODEL_STRING = "roberta-base"
# MODEL_STRING = "sentence-transformers/all-MiniLM-L6-v2"
GRAPHS = True
METRICS_SUMMARY = False
PREDICTIONS_SUMMARY = True

# ----------------------------------------------------

experimentation.queue_experiment(
    CoherenceExperiment(
        dataset_type="city",
        model_string=MODEL_STRING,
        max_words_per_step=6,
        start=1000,
        num_samples=NUM_SAMPLES,
        same_word_multiplier=1,
        no_same_word_penalty=1,
        kb_embeddings=True,
        coherence_dump_on_prediction=False,
        coherence_threshold=0.3,
        prediction_threshold=0.47,
        pruning=1,
        pruning_min=7,
        batch_size=10,
        print_metrics_summary=METRICS_SUMMARY,
        print_predictions_summary=PREDICTIONS_SUMMARY,
        show_graphs=GRAPHS,
        keyword_diversity=0.3,
        diverse_keywords=True,
        similar_keywords=True,
    )
)


experimentation.queue_experiment(
    CoherenceExperiment(
        dataset_type="disease",
        model_string=MODEL_STRING,
        max_words_per_step=6,
        start=1000,
        num_samples=NUM_SAMPLES,
        same_word_multiplier=1,
        no_same_word_penalty=1,
        kb_embeddings=True,
        coherence_dump_on_prediction=False,
        coherence_threshold=0.3,
        prediction_threshold=0.47,
        pruning=1,
        pruning_min=7,
        batch_size=10,
        print_metrics_summary=METRICS_SUMMARY,
        print_predictions_summary=PREDICTIONS_SUMMARY,
        show_graphs=GRAPHS,
        keyword_diversity=0.3,
        diverse_keywords=True,
        similar_keywords=True,
    )
)


# experimentation.queue_experiment(
#     CoherenceExperiment(
#         dataset_type="city",
#         model_string=MODEL_STRING,
#         max_words_per_step=4,
#         start=1000,
#         num_samples=NUM_SAMPLES,
#         same_word_multiplier=1,
#         no_same_word_penalty=1,
#         kb_embeddings=True,
#         coherence_dump_on_prediction=False,
#         coherence_threshold=0.3,
#         prediction_threshold=0.47,
#         pruning=1,
#         pruning_min=7,
#         batch_size=10,
#         print_metrics_summary=METRICS_SUMMARY,
#         print_predictions_summary=PREDICTIONS_SUMMARY,
#         show_graphs=GRAPHS,
#         keyword_diversity=0.3,
#         diverse_keywords=False,
#         similar_keywords=True,
#     )
# )

# experimentation.queue_experiment(
#     CoherenceExperiment(
#         dataset_type="city",
#         model_string=MODEL_STRING,
#         max_words_per_step=4,
#         start=1000,
#         num_samples=NUM_SAMPLES,
#         same_word_multiplier=1,
#         no_same_word_penalty=1,
#         kb_embeddings=True,
#         coherence_dump_on_prediction=False,
#         coherence_threshold=0.3,
#         prediction_threshold=0.47,
#         pruning=1,
#         pruning_min=7,
#         batch_size=10,
#         print_metrics_summary=METRICS_SUMMARY,
#         print_predictions_summary=PREDICTIONS_SUMMARY,
#         show_graphs=GRAPHS,
#         keyword_diversity=0.3,
#         diverse_keywords=True,
#         similar_keywords=False,
#     )
# )

<IPython.core.display.Javascript object>

In [None]:
experimentation.run()

Running experiment set: Q33X6
Running experiment: CoherenceExperiment(num_samples=2000, start=1000, dataset_type='city', model_string='roberta-base', max_words_per_step=6, same_word_multiplier=1, no_same_word_penalty=1, prediction_threshold=0.47, coherence_threshold=0.3, coherence_dump_on_prediction=False, pruning=1, pruning_min=7, dynamic_threshold=False, threshold_warmup=10, last_n_threshold=5, kb_embeddings=True, experiment_hash='TOC14', batch_size=10, keyword_diversity=0.3, diverse_keywords=True, similar_keywords=True, print_metrics_summary=False, print_predictions_summary=True, show_graphs=True)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at /Users/amitmaraj/.cache/torch/sentence_transformers/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.

TOTAL BATCHES: 200

......... 1 .......... 2 .......... 3 .......... 4 .......... 5 

### =========================================================

### Testing proximity metrics

In [71]:
# fmt: off
predictions=[1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
real=[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
# fmt: on

<IPython.core.display.Javascript object>

In [72]:
def check_proximity(predictions, real, proximity=2):
    exact_matches = 0
    very_close_matches = 0  # within 1 step
    close_matches = 0  # within 2 steps

    for i, (p, r) in enumerate(zip(predictions, real)):
        # take care of all the cases where we're at the
        # beginning of the arrays
        prev_idx = i - 1
        prev_prev_idx = i - 2
        if i == 0 or i == 1:
            prev_idx = 0
            prev_prev_idx = 0

        prev_prediction = predictions[prev_idx]
        prev_prev_prediction = predictions[prev_prev_idx]

        # take care of all the cases where we're at the
        # end of the arrays
        next_idx = i + 1
        next_next_idx = i + 2
        if i == (len(predictions) - 1):
            next_idx = i
            next_next_idx = i
        if i == (len(predictions) - 2):
            next_idx = i + 1
            next_next_idx = i + 1

        next_prediction = predictions[next_idx]
        next_next_prediction = predictions[next_next_idx]

        if r == 1 and p == 1:
            exact_matches += 1
            continue
        if r == 1:
            if prev_prediction == 1 or next_prediction == 1:
                very_close_matches += 1
                continue
            if prev_prev_prediction == 1 or next_next_prediction == 1:
                close_matches += 1
                continue
            continue

    return exact_matches, very_close_matches, close_matches

<IPython.core.display.Javascript object>

In [73]:
exact_matches, close_matches, very_close_matches = check_proximity(predictions, real)

<IPython.core.display.Javascript object>

In [74]:
exact_matches, close_matches, very_close_matches, predictions.count(1), real.count(1)

(11, 0, 1, 15, 39)

<IPython.core.display.Javascript object>

In [75]:
num_positive_predictions = predictions.count(1)
num_real_positives = real.count(1)

<IPython.core.display.Javascript object>

In [76]:
print("Predictions ----->")
print(
    f"Percentage of positive predictions made that are within proximity of 2: {100*close_matches/num_positive_predictions}"
)
print(
    f"Percentage of positive predictions made that are within proximity of 1: {100*very_close_matches/num_positive_predictions}"
)
print(
    f"Percentage of positive predictions made that are exact matches: {100*exact_matches/num_positive_predictions}"
)
print("Real ----->")
print(
    f"Percentage of real predictions made that are within proximity of 2: {100*close_matches/num_real_positives}"
)
print(
    f"Percentage of real predictions made that are within proximity of 1: {100*very_close_matches/num_real_positives}"
)
print(
    f"Percentage of real predictions made that are exact matches: {100*exact_matches/num_real_positives}"
)

Predictions ----->
Percentage of positive predictions made that are within proximity of 2: 0.0
Percentage of positive predictions made that are within proximity of 1: 6.666666666666667
Percentage of positive predictions made that are exact matches: 73.33333333333333
Real ----->
Percentage of positive predictions made that are within proximity of 2: 0.0
Percentage of positive predictions made that are within proximity of 1: 2.5641025641025643
Percentage of positive predictions made that are exact matches: 28.205128205128204


<IPython.core.display.Javascript object>

In [82]:
proximity = (
    (1 / 3 * (close_matches / num_real_positives))
    + (1 / 2 * (very_close_matches / num_real_positives))
    + (exact_matches / num_real_positives)
) / (
    (1 / 3 * (close_matches / num_positive_predictions))
    + (1 / 2 * (very_close_matches / num_positive_predictions))
    + (exact_matches / num_positive_predictions)
)

<IPython.core.display.Javascript object>

In [84]:
proximity

0.38461538461538464

<IPython.core.display.Javascript object>

In [90]:
numerator = (
    (1 / 3 * (close_matches / num_real_positives))
    + (1 / 2 * (very_close_matches / num_real_positives))
    + (exact_matches / num_real_positives)
)

denominator = (
    (1 / 3 * (close_matches / num_positive_predictions))
    + (1 / 2 * (very_close_matches / num_positive_predictions))
    + (exact_matches / num_positive_predictions)
)

num_real_positives, num_positive_predictions, numerator, denominator

(39, 15, 0.2948717948717949, 0.7666666666666666)

<IPython.core.display.Javascript object>

In [92]:
numerator * denominator, denominator / numerator

(0.22606837606837607, 2.5999999999999996)

<IPython.core.display.Javascript object>