In [1]:
from model_io import ModelIO
from predictions_evaluator import PredictionsEvaluator

model_io = ModelIO(verbose=True)


In [2]:
bert = model_io.load_model("bert_base_uncased")
deberta_base = model_io.load_model("deberta_v3_base")
deberta_small = model_io.load_model("deberta_v3_small")
roberta = model_io.load_model("roberta_base")


Loading model from ./models/bert_base_uncased...
Loading model from ./models/deberta_v3_base...
Loading model from ./models/deberta_v3_small...
Loading model from ./models/roberta_base...


In [3]:
from dataframes_loader import DataFramesLoader
import pandas as pd


def transform_two_answers_per_row_to_one_answer_per_row(df):
    dfs = list()
    for i in range(1, 3):
        columns = ["comment_text", f"answer{i}_text", f"answer{i}_label"]
        map_columns = {
            f"answer{i}_text": "answer_text",
            f"answer{i}_label": "answer_label",
        }
        dfs.append(df[columns].rename(columns=map_columns))

    return pd.concat(dfs).reset_index(drop=True)


def get_test_df(preloaded: bool, testing_instances: int) -> pd.DataFrame:
    df_loader = DataFramesLoader()
    _, test_df = df_loader.get_datasets(preloaded=preloaded)
    test_df = test_df[:testing_instances]
    test_df = transform_two_answers_per_row_to_one_answer_per_row(test_df)
    return test_df


def get_X_y(df: pd.DataFrame):
    X = df[["comment_text", "answer_text"]].values
    X = list(map(tuple, X))

    y = df["answer_label"].values

    return X, y


In [4]:
import os

PREDICTIONS_PATH = "predictions"

bert_pred = pd.read_pickle(
    os.path.join(PREDICTIONS_PATH, "bert_base_uncased.pkl")
)
deberta_small_pred = pd.read_pickle(
    os.path.join(PREDICTIONS_PATH, "deberta_v3_small.pkl")
)
deberta_base_pred = pd.read_pickle(
    os.path.join(PREDICTIONS_PATH, "deberta_v3_base.pkl")
)
roberta_pred = pd.read_pickle(
    os.path.join(PREDICTIONS_PATH, "roberta_base.pkl")
)


In [5]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB


def get_bow_pred():
    df_loader = DataFramesLoader()
    train_df, test_df = df_loader.get_datasets()

    texts = (
        train_df["comment_text"].values
        + train_df["answer1_text"].values
        + train_df["answer2_text"].values
    )  # type: ignore

    def concat_commend_and_answer(
        df: pd.DataFrame, answer_number: int
    ) -> pd.Series:
        return df["comment_text"] + " | " + df[f"answer{answer_number}_text"]

    train_texts1 = concat_commend_and_answer(train_df, 1)
    train_texts2 = concat_commend_and_answer(train_df, 2)
    train_texts = pd.concat([train_texts1, train_texts2], ignore_index=True)
    train_labels = pd.concat(
        [train_df["answer1_label"], train_df["answer2_label"]],
        ignore_index=True,
    )

    test_texts1 = concat_commend_and_answer(test_df, 1)
    test_texts2 = concat_commend_and_answer(test_df, 2)
    test_texts = pd.concat([test_texts1, test_texts2], ignore_index=True)
    test_labels = pd.concat(
        [test_df["answer1_label"], test_df["answer2_label"]], ignore_index=True
    )

    vectorizer = CountVectorizer(stop_words="english")
    model = MultinomialNB()

    vectorizer = vectorizer.fit(texts)
    model.fit(vectorizer.transform(train_texts), train_labels)
    bow_pred = model.predict(vectorizer.transform(test_texts))
    return bow_pred


bow_pred = get_bow_pred()


In [6]:
test_df = get_test_df(True, 200000)
y_true = test_df["answer_label"].values


In [7]:
bert_mistakes = y_true != bert_pred
roberta_mistakes = y_true != roberta_pred
deberta_small_mistakes = y_true != deberta_small_pred
deberta_base_mistakes = y_true != deberta_base_pred
bow_mistakes = y_true != bow_pred


## Find where BoW misses and transformers get it right


In [8]:
bow_mistakes_df = test_df[bow_mistakes]
bow_mistakes_index = bow_mistakes_df.index


In [9]:
transformers_agreement = (
    ~bert_mistakes & ~roberta_mistakes
) & ~deberta_base_mistakes

agreement_index = test_df[transformers_agreement].index


In [10]:
df = test_df.iloc[agreement_index.intersection(bow_mistakes_index), :]
df


Unnamed: 0,comment_text,answer_text,answer_label
4,Who else thinks that javascript alert() is an ...,It's a useful debugging tool,1
12,Cops Bust Into Gizmodo Editor's Home And Seize...,"/Libertarian/: Where the police state is fine,...",0
14,"Wisconsin GOP caught red handed, looking to ru...",And we're upset since the Democrats would *nev...,1
23,I've honestly never come across a dumber human...,"TIL, voluntary slavery by starving people who'...",1
24,Sexual assault under broad daylight in China.,CPR fail.,0
...,...,...,...
54963,Literally every 15 seconds right now (9.18 CT),I thought this was going to be a post about th...,0
54967,"$499.99 Black Series 6"" scale AT-ACT will be a...",It's what the fans want!,1
54970,Some further attic rummaging and I came across...,Steiner!,0
54978,Previously unreleased FBI photo's of 9/11 Pent...,But it was a cruise missile!,1


In [11]:
print(f"{len(df) / len(test_df)*100:.2f}%")


18.09%


We see that in $18\%$ of the test data set, the transformers agreed with a
particular answer that was right and the Bag-of-Words got that answer wrong.

Let's see some examples.


In [12]:
def print_line(loc: int) -> None:
    print(f"comment: '{df.loc[loc]['comment_text']}'")
    print(f"answer: '{df.loc[loc]['answer_text']}'")
    print(f"label: '{df.loc[loc]['answer_label']}'")


In [13]:
print_line(4)


comment: 'Who else thinks that javascript alert() is an annoying, lazy, and ugly way to notify me of something on your site.'
answer: 'It's a useful debugging tool'
label: '1'


In [14]:
print_line(19765)


comment: 'Pilot 'congratulates' passengers for drinking all alcohol on plane'
answer: 'good for the pilot because that means less alcohol to weigh down the plane during landing'
label: '1'


In [15]:
print_line(29469)


comment: 'After being down 4-1, the Ducks climb all the way back to win in overtime 5-4, taking a 3-0 series lead.'
answer: 'SUBSCRIBE'
label: '0'


In [16]:
print_line(28884)


comment: '"I'm 'X' and I'm not offended!"'
answer: 'Even if the majority of people are offended by something, that in itself does not make that behavior unacceptable.'
label: '0'


In [17]:
df["answer_label"].value_counts(normalize=True)


0    0.533072
1    0.466928
Name: answer_label, dtype: float64

## Find where DeBERTa gets it right and RoBERTa and BERT don't


In [18]:
deberta_right_but_others_dont = (
    bert_mistakes & roberta_mistakes
) & ~deberta_base_mistakes

df = test_df[deberta_right_but_others_dont]
df


Unnamed: 0,comment_text,answer_text,answer_label
40,Athens Riots--Greek General Strike Day 2 in Pi...,"Is it me, or are there a lot of photos showing...",1
41,"Starting today, 47 million women will have acc...",WHORES!,1
46,"Romney's Economic Plan Would Kill 360,000 Jobs...","Well, yeah, but his plan would funnel money to...",0
59,The Smoking Gun identifies the anti-gay Cheeri...,"""WHEN WILL THIS ANTI-CHRISTIAN BIGOTRY END????""",1
89,Apparently my face is the envy of astronomers ...,"He forgot to say ""no homo"".",1
...,...,...,...
54951,Christopher Nolan and Sofia Coppola urge fans ...,Are they gonna get us a babysitter?,1
54957,The Pegassi Collection,How is the classic was thinking of getting it !,0
54977,The Secrit Play of the Minnesota Timberwolves,Snitch!,1
54979,PETA has sent a letter to Nintendo about it's ...,I think the real shocker here is the implicati...,1


In [19]:
print(f"{len(df) / len(test_df)*100:.2f}%")


3.39%


In [20]:
print_line(5591)


comment: 'Nearly one in three separated UK mothers think fathers should have no say in their children's lives'
answer: 'I should thank God every day that my ex got an abortion.'
label: '0'


In [21]:
print_line(15406)


comment: 'BREAKING: Hurricane Matthew to bypass North Carolina due to HB2'
answer: 'Guys stop... it wasnt funny the first 80 times the joke was made....'
label: '0'


In [22]:
print_line(22691)


comment: 'Not the best way of asking someone out, dude.'
answer: 'He said "a little"; he knows better than to ask for too much right off the bat.'
label: '1'


In [23]:
print_line(54951)


comment: 'Christopher Nolan and Sofia Coppola urge fans to watch films in cinemas, not on streaming services.'
answer: 'Are they gonna get us a babysitter?'
label: '1'


In [24]:
df["answer_label"].value_counts(normalize=True)


1    0.535121
0    0.464879
Name: answer_label, dtype: float64

## Find where deberta_small didn't get it right, but base did


In [25]:
deberta_base_got_right = (deberta_small_mistakes) & ~deberta_base_mistakes

df = test_df[deberta_base_got_right]
df


Unnamed: 0,comment_text,answer_text,answer_label
11,Bruce Eckel is Wrong - Every time the subject ...,public interface SomeService { void provideSer...,1
13,How to get out of your rut.,Congratulations- you'll be homeless before you...,0
43,Naruto 596 on MangaPanda,"In the next 10 chapters, Kakashi reveals he ha...",1
46,"Romney's Economic Plan Would Kill 360,000 Jobs...","Well, yeah, but his plan would funnel money to...",0
79,Moscow bans gay pride marches for the next cen...,Lucky russians,0
...,...,...,...
54951,Christopher Nolan and Sofia Coppola urge fans ...,Are they gonna get us a babysitter?,1
54968,"Is it just me, or does the botched Ronaldo sta...",What do you mean botched?,1
54975,Being fit is a fascist aesthetic,The concentration camps where just a governmen...,1
54977,The Secrit Play of the Minnesota Timberwolves,Snitch!,1


In [26]:
df.sample(4)


Unnamed: 0,comment_text,answer_text,answer_label
26271,The Tampa Jewish community has raised nearly $...,but not the muslims,0
33925,UFC owners in advanced talks to sell business ...,Dana is denying reports of the sale via ESPN,0
43288,Why won't someone think of the children?,More dead babies=More dead baby jokes It's wor...,0
29042,Jeff just referenced Dinoflask in a BAFTA inte...,"This is a great Dinoflask reference and all, b...",1


In [27]:
print(f"{len(df) / len(test_df)*100:.2f}%")


7.12%


In [28]:
df["answer_label"].value_counts(normalize=True)

0    0.590595
1    0.409405
Name: answer_label, dtype: float64

In [29]:
print_line(19493)


comment: '2 years of upgrades'
answer: 'Look at the last picture I think you left some shit on the desk there in the left side'
label: '0'


In [30]:
print_line(51762)


comment: 'Cop Block founder Adeemo Freeman arrested on drug trafficking charges'
answer: 'hahahahahahahahahahahahahahahahahaha'
label: '0'


In [31]:
print_line(54968)


comment: 'Is it just me, or does the botched Ronaldo statue look more like someone more local?'
answer: 'What do you mean botched?'
label: '1'


In [32]:
print_line(54975)


comment: 'Being fit is a fascist aesthetic'
answer: 'The concentration camps where just a government weight lost program gone awry'
label: '1'


In [33]:
df_loader = DataFramesLoader()
train_df, test_df = df_loader.get_datasets(preloaded=True)

print(f'{len(train_df)=} {len(test_df)=}')

len(train_df)=109181 len(test_df)=27496


In [34]:
train_df, test_df = df_loader.get_datasets(preloaded=True)
train_df = transform_two_answers_per_row_to_one_answer_per_row(train_df)
test_df = transform_two_answers_per_row_to_one_answer_per_row(test_df)

print(f'{len(train_df)=} {len(test_df)=}')

len(train_df)=218362 len(test_df)=54992


In [35]:
train_df, test_df = df_loader.get_datasets(preloaded=False)
train_df = transform_two_answers_per_row_to_one_answer_per_row(train_df)
test_df = transform_two_answers_per_row_to_one_answer_per_row(test_df)

print(f'{len(train_df)=} {len(test_df)=}')

len(train_df)=20000 len(test_df)=20000
