In [None]:
!pip install -q ruptures alibi-detect langchain-huggingface sentence-transformers
import pandas as pd
import matplotlib.pyplot as plt
import sys, pathlib
from vectors import vectors
from cosine_sim import BiweeklyCosineDissimilarity
from cumulative_cosine_sim import BiweeklyCumulativeDissimilarity
from mmd_calculation import BiweeklyMMD
from change_point_detection import ChangePointDetector
import utils

In [None]:
# Data preparation

path_to_dataset = 'fakerecogna_abstrativo.xlsx'
utils.FakeRecogna2(path_to_dataset)
df = utils.load()


emb = vectors(
    model_name="neuralmind/bert-large-portuguese-cased",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": False},
)

df = df.loc[(df.date >= '2020-01-01') & (df.date <= '2021-12-31')]
df = emb.attach(df, text_col="text")

In [None]:
# cosine similarity

cos = BiweeklyCosineDissimilarity(
    df=df,
    date_col="date",
    emb_col="embeddings",
    freq="2W",
)

cos.compute()

In [None]:
# cumulative cosine similarity

cum_cos = BiweeklyCumulativeDissimilarity(
    df=df,
    date_col="date",
    emb_col="embeddings",
    freq="2W",
)

cum_cos.compute()

In [None]:
# change point detection

cpd = ChangePointDetector(
    df=df,
    date_col="date",
    emb_col="embeddings",
    freq="D",
    model="rbf",
    algo="binseg")

In [None]:
# MMD

mmd = BiweeklyMMD(
    df=df,
    date_col="date",
    emb_col="embeddings",
    freq="2W",
    backend="pytorch",
    p_val=0.05,
)

mmd.compute()