# Inference on Speeches and Tweets

**Author:** [Giuseppe Tripodi](https://www.linkedin.com/in/giuseppe-tripodi-unical/)<br>
**Date created:** 2022/11/12<br>
**Description:** Predict model results on a test set consisting of speeches and tweets

# Setup

## Install package

In [None]:
!pip install datasets transformers
!pip install sentencepiece
!pip install sacremoses
!pip install nltk
!pip install transformers
!pip install evaluate
!pip install wandb

## Import Libraries

In [None]:
import json
import os
import csv
import re
import wandb
import transformers
from transformers import AutoConfig, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer
from sklearn import preprocessing
import numpy as np
import evaluate
from transformers.integrations import TensorBoardCallback
import transformers
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer
from transformers import Pipeline, TextClassificationPipeline
import numpy as np
from datasets import load_dataset, load_metric
import pandas as pd
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
from datetime import date
from sklearn.metrics import plot_confusion_matrix
import seaborn as sns

## Setup Weight&Biases and General variables

In [None]:
today = date.today()
today = today.strftime("%b-%d-%Y")

In [None]:
#os.environ["WANDB_DISABLED"] = "true"

In [None]:
%env WANDB_PROJECT=
%env WANDB_LOG_MODEL=
%env WANDB_API_KEY=

In [None]:
wandb.login()

## Support Functions


In [None]:
def softmax(outputs):
    maxes = np.max(outputs, axis=-1, keepdims=True)
    shifted_exp = np.exp(outputs - maxes)
    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)

### TextClassification Pipeline

In [None]:
class MyTextClassificationPipeline(TextClassificationPipeline):
    """
    Custom text classification pipeline
    """
    def _sanitize_parameters(self, **kwargs):
        """
        Checks the parameters passed. Returns three dict of kwargs
        that will be passed to preprocess, _forward and postprocess.
        :param kwargs: 
        :return: 
        """""
        return {}, {}, {}

    def preprocess(self, inputs):
        """
        Takes the input and turn it into something feedable to the model
        :param inputs:
        :param maybe_arg:
        :return:
        """
        return self.tokenizer(inputs, return_tensors=self.framework)

    def _forward(self, model_inputs):
        """
        Forward step
        :param model_inputs:
        :return:
        """
        return self.model(**model_inputs)

    def postprocess(self, model_outputs):
        """
        Turns the forward step output into the final output
        :param model_outputs:
        :return:
        """
        logits = model_outputs.logits[0].numpy()
        probabilities = softmax(logits)

        best_class = np.argmax(probabilities)
        label = self.model.config.id2label[best_class]
        score = probabilities[best_class].item()
        logits = logits.tolist()
        return {"label": label, "best_class_code": best_class, "score": score, "logits": logits}

### Compute Metrics

In [None]:
PLOT_PATH = "./"

class ComputeMetrics:
    """
    A class used to compute metrics on model output and plot the results.
    ...

    Methods
    ---------
    compute_metrics()
        returns the computed metrics
        
    plot_consistency_for_politician()
        plots a bar pot of true positive and total prediction
    
    confusion_matrix_plot()
        plots the confusion matrix
    
    misclassification_pie_chart()
        draws a pie chart of false positives
        

    """

    def __init__(self, model_predictions, model_inputs, mapping, tc2=False, tags=""):
        """
        :param model_predictions: dict
            output of the TextClassificationPipeline
        :param model_inputs: DataFrame
            model input
        :param mapping: 
            mapping between label and associated id, used to map input labels to ids used by models
        :parma tc2: boolean
            This is true if you perform text classification on election programs. Input labels are different in text classification of election programs.
        """
        self.model_predictions = model_predictions
        self.model_inputs = model_inputs
        self.mapping = mapping
        self.tags = tags
        self.tc2 = tc2

        # DEFINE Y_PRED AND Y_TRUE
        self.references_labels = self.model_inputs["label"].map(mapping).tolist() # y_true
        if self.tc2:
            #  If tc2, the labels must be changed because "CarloCalenda" and "MatteoRenzi" have the same labels on the test set.
            mapping_prediction_label = {
                "CarloCalenda": self.mapping["TerzoPolo"],
                "EnricoLetta":self.mapping["PD"],
                "GiorgiaMeloni":self.mapping["FratelliDItalia"],
                "GiuseppeConte":self.mapping["Movimento5Stelle"],
                "MatteoRenzi":self.mapping["TerzoPolo"],
                "MatteoSalvini":self.mapping["Lega"],
                "SilvioBerlusconi":self.mapping["ForzaItalia"]
            }
            self.predictions_labels = pd.DataFrame(self.model_predictions)["label"].map(mapping_prediction_label).tolist() #y_pred
        else: 
            self.predictions_labels = pd.DataFrame(self.model_predictions)["best_class_code"].tolist() #y_pred

    def compute_metrics(self):
        """
        Prints the values of: Accuracy, F1, precision and recall
        """
        # load and define the different metrics
        accuracy = evaluate.load('accuracy')
        f1 = evaluate.load('f1', average='macro')
        precision = evaluate.load('precision')
        recall = evaluate.load('recall', average='macro')
        roc_auc_score = evaluate.load("roc_auc", "multiclass")
        
        # print metrics
        print(accuracy.compute(predictions=self.predictions_labels, references=self.references_labels))
        print(f1.compute(predictions=self.predictions_labels, references=self.references_labels, average='weighted'))
        print(precision.compute(predictions=self.predictions_labels, references=self.references_labels, average='weighted'))
        print(recall.compute(predictions=self.predictions_labels, references=self.references_labels, average='weighted'))

        # ROC AUC
        pred_scores = pd.DataFrame(self.model_predictions)["logits"].transform(softmax)
        try:
            print(self.roc_auc_score.compute(references=self.references_labels, prediction_scores=pred_scores,multi_class='ovr', labels=[0, 1, 2, 3, 4, 5, 6]))
        except:
            pass

    def plot_consistency_for_politician(self):
        """
        plots a bar pot of true positive and total prediction number
        """
        # compute the confusion matrix
        matrix = confusion_matrix(self.references_labels, self.predictions_labels, labels=np.arange(len(self.mapping.keys())))
        # takes only the TP
        diagonal = matrix.diagonal()
        # takes the number of predictions
        tot_ele = []
        for i in range(len(matrix)):
            tot_ele.append(sum(matrix[i]))

        # plot the results
        politician = self.mapping.keys()
        X_axis = np.arange(len(politician))

        fig = plt.figure(figsize=(10, 5))
        # creating the bar plot
        plt.bar(X_axis - 0.2,diagonal, color="maroon", width=0.4, label="Correct predictions")
        plt.bar(X_axis + 0.2,tot_ele, color="#E5BABA", width=0.4, label="Total number of predictions")

        plt.xticks(X_axis, politician)        
        plt.xlabel("Italian Politician")
        plt.ylabel("number of predictions")
        plt.title("Italian Politician Accuracy", fontsize=12)
        plt.legend()
        plt.savefig(f"{PLOT_PATH}/accuracy_for_politician_{'tc2' if self.tc2 else 'tc1'}_{self.tags}_{today}.png")
        
        # print the percentage
        for i in range(len(tot_ele)):
            print(f"Politico: {list(politician)[i]}")
            print(f"predizioni corrette:{diagonal[i]}\npredizioni totali: {tot_ele[i]}")
            print(f"Accuracy: {diagonal[i] / tot_ele[i]}")
            print("\n")


    def confusion_matrix_plot(self):
        """
        Plots the confusion matrix
        """
        disp = ConfusionMatrixDisplay.from_predictions(y_true=self.references_labels, y_pred= self.predictions_labels, labels=np.arange(len(self.mapping.keys())),
                                                       display_labels=list(self.mapping.keys()), cmap=plt.cm.Reds)
        fig = disp.ax_.get_figure()
        disp.ax_.tick_params(axis='x', which='major', labelsize=13)
        disp.ax_.tick_params(axis='y', which='major', labelsize=13)
        fig.set_figwidth(15)
        fig.set_figheight(10)
        plt.xticks(rotation=30)
        plt.title("Confusion Matrix", fontsize=14)
        plt.savefig(f"{PLOT_PATH}/confusion_matrix_{'tc2' if self.tc2 else 'tc1'}_{self.tags}_{today}.png")
    



    def misclassification_pie_chart(self):
        y_pred = np.array(self.predictions_labels)
        y_true = np.array(self.references_labels)

        # takes only the misclassified element
        y_pred_mis = y_pred[y_pred != y_true]
        y_true_mis = y_true[y_pred != y_true]
        matrix = confusion_matrix(y_true_mis, y_pred_mis, labels=np.arange(len(self.mapping.keys())))
        politician_names = list(self.mapping.keys())

        colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(matrix[0])))
        fig, axs = plt.subplots(nrows=4, ncols=2, figsize=(15, 15))
        fig.tight_layout()
        i = 0
        for ax in axs.ravel():
            if i < len(matrix):
                ax.set_title(politician_names[i], fontsize=15)
                ax.pie(matrix[i], colors=colors,
                       labels=[politician_names[pol_name] if matrix[i][pol_name] != 0 else None for pol_name in
                               range(len(politician_names))])
                i += 1
            else:
                fig.delaxes(ax)
                # last pie
                ax.pie([1, 0, 0, 0, 0, 0, 0])
        plt.savefig(f"{PLOT_PATH}/politician_misclassification_{'tc2' if self.tc2 else 'tc1'}_{self.tags}_{today}.png")



In [None]:
def define_structure_for_line_plots(dataset_input, eval_predict) -> pd.DataFrame:
    """
    creates the dataframe that will be used to generate all line graphs
    :return: dataframe
    """
    df = pd.DataFrame(eval_predict).drop(["logits", "best_class_code"], axis=1)
    df = df.rename(columns={"label":"assigned_label"})
        
    #input dataframe
    df_input = dataset_input.copy()
    df_input["impression"] = df_input[["viewCount", "likeCount", "commentCount", "retweet_count" , "reply_count", "quote_count"]].sum(axis=1)
    df_input.drop(df_input.columns.difference(["video_id", "created_at", "text" , "label" , "tweet_id", "impression"]), 1, inplace=True)
    df_input.rename(columns={"label":"original_label"}, inplace=True)

    # concat the dataframe
    df_input = pd.concat([df_input, df], axis=1)
    return df_input

In [None]:
def line_plot_correlation_score_impression(df: pd.DataFrame):
    fontsize = 12
    fig, ax = plt.subplots(2,2, figsize=(20, 10))

    # TEST: NORMALIZE THE IMPRESSION COLUMNS
    # copy the data
    df_min_max_scaled = df.copy()
  
    # apply normalization techniques on Column 1
    column = 'impression'
    df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())    
    df = df_min_max_scaled.copy()
    # END TEST
    
    # TWEETS PLOTTING
    df_tweets = df.loc[df["tweet_id"] != 0]    
    # plotting tweets
    ax[0][0].set_title("Tweets score impression correlation", fontdict={"fontsize":fontsize})
    sns.histplot(data=df_tweets, x="impression", y="score", cbar=True, bins=30, ax = ax[0][0])
    
    ax[0][1].set_title("Tweets impression Distributions", fontdict={"fontsize":fontsize})
    sns.kdeplot(data=df_tweets, x="impression",  weights="score", hue="original_label" , ax=ax[0][1])    
        
    # SPEECH plotting
    df_speech = df.loc[df["video_id"] != "0"]
    df_speech = df_speech.groupby(["video_id", "original_label"])["score", "impression"].mean()    
    # plotting
    ax[1][0].set_title("Speech score impression correlation", fontdict={"fontsize":fontsize})
    sns.histplot(data=df_speech, x="impression", y="score",cbar=True, bins=30, ax = ax[1][0])
    
    ax[1][1].set_title("Speech impression Distributions", fontdict={"fontsize":fontsize})
    sns.kdeplot(data=df_speech, x="impression",  weights="score", hue="original_label" , ax=ax[1][1])    
    plt.savefig(f"{PLOT_PATH}/score_impression_correlation_{today}.png")

In [None]:
def line_plot_correlation_score_period(df: pd.DataFrame):
    df["created_at"] = pd.to_datetime(df["created_at"], format="%Y-%m-%d %H:%M:%S+00:00")
    fontsize = 12
    fig, ax = plt.subplots(2,2, figsize=(20, 10))

    
    # TWEETS PLOTTING
    df_tweets = df.loc[df["tweet_id"] != 0]
    df_tweets = df_tweets.groupby([df.created_at.dt.month_name().rename('month'), "original_label"])["score", "impression"].mean()    
    
    # plotting tweets
    ax[0][0].tick_params(labelrotation = 25)
    ax[0][0].set_title("Tweets score period correlation", fontdict={"fontsize":fontsize})
    sns.histplot(data=df_tweets, x="month", y="score",cbar=True,  bins=30, ax = ax[0][0])
    
    ax[0][1].tick_params(labelrotation = 25)
    ax[0][1].set_title("Tweets impression Distributions", fontdict={"fontsize":fontsize})
    hist = sns.histplot(data=df_tweets, x="month",  weights="score", multiple="dodge", shrink=1, binwidth=5, hue="original_label" , ax=ax[0][1])    
    sns.move_legend(hist, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    
    # SPEECH PLOTTING
    df_speech = df.loc[df["video_id"] != "0"]
    df_speech = df_speech.groupby([df.created_at.dt.month_name().rename('month'), "original_label"])["score", "impression"].mean()    
    
    # plotting 
    ax[1][0].tick_params(labelrotation = 15)
    ax[1][0].set_title("Speech score period correlation", fontdict={"fontsize":fontsize})
    sns.histplot(data=df_speech, x="month", y="score",cbar=True,  bins=30, ax = ax[1][0])
    
    ax[1][1].tick_params(labelrotation = 15)
    ax[1][1].set_title("Speech impression Distributions", fontdict={"fontsize":fontsize})
    hist_s = sns.histplot(data=df_speech, x="month",  weights="score", multiple="dodge", shrink=1, binwidth=5, hue="original_label" , ax=ax[1][1])    
    sns.move_legend(hist_s, bbox_to_anchor=(1.02, 0.5), loc='upper left', borderaxespad=0)
    fig.tight_layout(pad=2.0)
    plt.savefig(f"{PLOT_PATH}/score_period_correlation_{today}.png")

# Inference 

## Read test set

In [None]:
# Define some variables
HOME = "../input/models"
NUM_LABELS = 7

data = pd.read_csv("../input/text-classification-1/it/test_set.csv")
#data.head()

# define the mapping between label and id for the text classification 1
mapping = {
            "CarloCalenda":0,
            "EnricoLetta":1,
            "GiorgiaMeloni":2,
            "GiuseppeConte":3,
            "MatteoRenzi":4,
            "MatteoSalvini":5,
            "SilvioBerlusconi":6
        }

## Load models

In [None]:
run = wandb.init()
artifact = run.use_artifact('giusetrip98/ItalianPoliticianConsistency/gilberto_tc1_new_speech_and_tweets:v0', type='model')

artifact_dir = artifact.download()

#checkpoint = "Musixmatch/umberto-wikipedia-uncased-v1"
#checkpoint = "m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0"
checkpoint = "idb-ita/gilberto-uncased-from-camembert"

In [None]:
# Load Hugging Face model from that folder using the same model class
model = AutoModelForSequenceClassification.from_pretrained(artifact_dir, num_labels=NUM_LABELS)
tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding=True, truncation=True)
my_pipeline = MyTextClassificationPipeline(model=model, tokenizer=tokenizer)


## Compute Metrics and Plotting

In [None]:
# predict
eval_predict = my_pipeline(data["text"].tolist())
#print(eval_predict)

In [None]:
# Metric computation
cm = ComputeMetrics(eval_predict, data, mapping, tc2=False)
cm.compute_metrics()

### Consistency for politician

In [None]:
cm.plot_consistency_for_politician()

### Confusion Matrix

In [None]:
matrix = cm.confusion_matrix_plot()

### Misclassification Pie Chart

In [None]:
cm.misclassification_pie_chart()

### Correlation Score and Impression

In [None]:
df = define_structure_for_line_plots(data, eval_predict)

In [None]:
# compute accuracy for type
df_video = df[df["video_id"] != '0']
df_tweets = df[df["tweet_id"] != 0]

for pol in df["original_label"].unique():
    print(f"Pol: {pol}")
    TP_video  = len(df_video[df_video['assigned_label'] == df_video['original_label']][df_video['assigned_label'] == pol])
    tot_video = len(df_video[df_video['original_label'] == pol])
    
    TP_tweets  = len(df_tweets[df_tweets['assigned_label'] == df_tweets['original_label']][df_tweets['assigned_label'] == pol])
    tot_tweets = len(df_tweets[df_tweets['original_label'] == pol])
    print(f"tot_tp = {TP_tweets + TP_video}")
    print(f"tot = {tot_video + tot_tweets}")
    print(f"Acc video: {TP_video/tot_video}")
    print(f"Acc tweets: {TP_tweets/tot_tweets}")
    print()

In [None]:
# compute avarage score of misclassification for speeches
df_mis_speeches = df[(df["original_label"] != df["assigned_label"]) & (df["video_id"] != '0')]
grouped_single_speeches = df_mis_speeches.groupby('original_label').agg({'score': ['mean', 'max', 'min', 'count']})

print(grouped_single_speeches)
print()
print(grouped_single_speeches["score"]["count"] / (grouped_single_speeches["score"]["count"] + grouped_single_tweets["score"]["count"]))

In [None]:
# compute avarage score of misclassification for tweets
df_mis_tweets = df[(df["original_label"] != df["assigned_label"]) & (df["tweet_id"] != 0)]
grouped_single_tweets = df_mis_tweets.groupby('original_label').agg({'score': ['mean', 'max', 'min', 'count']})

print(grouped_single_tweets)
print()
print(grouped_single_tweets["score"]["count"] / (grouped_single_speeches["score"]["count"] + grouped_single_tweets["score"]["count"]))

In [None]:
line_plot_correlation_score_impression(df)

### Correlation Score and Time

In [None]:
line_plot_correlation_score_period(df)