In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from transformers import pipeline
import tqdm

from utils import config, utils

In [2]:
df, df_cv = utils.Utils.load_data()

In [3]:
pipe = pipeline("text-classification", model="ProsusAI/finbert")

RuntimeError: Failed to import transformers.models.bert.modeling_tf_bert because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.

In [None]:
y_pred = []

for idx, row in tqdm.tqdm(df.iterrows(), desc="Predicting sentiment"):
    try:
        result = pipe(
            {"text": row["text"]},
            truncation=True  
        )
        y_pred.append(result)
    except Exception as e:
        print(idx, "|", e, "|", row["text"])
        y_pred.append({"label": "N/A"})
    
df["y_pred"] = y_pred

In [None]:
df.head()

In [None]:
df["pred_sentiment"] = df["y_pred"].apply(lambda x: x["label"])
df["pred_sentiment"] = df["pred_sentiment"].replace({"POS": "positive", "NEU": "neutral", "NEG": "negative"})
df.head()

In [None]:
cm = confusion_matrix(df["label"], df["pred_sentiment"], labels=config.SENTIMENTS, normalize="true")
print(cm)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=config.SENTIMENTS)
disp.plot(cmap=plt.cm.Blues)