In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import pandas as pd

model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.10it/s]
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
def preprocess(dataset):
    dataset.rename(columns={'sentiment': 'Sentiment', 'review': 'Text'}, inplace=True)
    dataset['Sentiment'] = dataset['Sentiment'].replace({'negative': 0, 'positive': 1})
    dataset['Sentiment'] = dataset['Sentiment'].replace({'Negative': 0, 'Positive': 1})
    dataset = dataset.sample(frac=1, random_state=1).reset_index(drop=True)


In [3]:
original = pd.read_csv("data/test_original.tsv", sep='\t')
preprocess(original)

  dataset['Sentiment'] = dataset['Sentiment'].replace({'Negative': 0, 'Positive': 1})


In [4]:
contrast = pd.read_csv("data/test_contrast.tsv", sep='\t')
preprocess(contrast)

  dataset['Sentiment'] = dataset['Sentiment'].replace({'Negative': 0, 'Positive': 1})


In [5]:
def inference(*, prompt, sentence, tokenizer, model):
    inputs = tokenizer(prompt(sentence),  padding='max_length', return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=50)
    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    
    if "negative" in result.lower():
        return 0
    elif "positive" in result.lower():
        return 1
    else:
        return 2

In [6]:
import random


def build_example(dataset, index, caps, spaced):
    string = ""
    if(caps == "all"):
        string = "TEXT: " + dataset.Text[index] + "\nSENTIMENT: " + ["NEGATIVE","POSITIVE"][dataset.Sentiment[index]] + "\n\n"
    elif(caps == "key"):
        string = "TEXT: " + dataset.Text[index] + "\nSENTIMENT: " + ["negative","positive"][dataset.Sentiment[index]] + "\n\n"
    elif(caps == "lower"):
        string = "Text: " + dataset.Text[index] + "\nSentiment: " + ["negative","positive"][dataset.Sentiment[index]] + "\n\n"
    return string

def get_build_prompt(dataset, index, shots, system, caps, spaced):
    def build_prompt(sent):
        ret = ""
        if(system):
            if(shots > 0):
                ret += "Classify the text into negative or positive. Here you have some examples:\n\n"
            else:
                ret += "Classify the text into negative or positive.\n\n"
        help_list = []
        for j in range(shots):
            help = random.sample(range(len(dataset)), 1)[0]
            while(index == help or help in help_list):
                help = random.sample(range(len(dataset)), 1)[0]
            help_list.append(help)
            example = build_example(dataset, help, caps, spaced)
            ret += example
        if(caps=="all" or caps=="key"):
            ret += "TEXT: " + sent + "\nSENTIMENT: "
        elif(caps=="lower"):
            ret += "Text: " + sent + "\nSentiment: "
        return ret
    return build_prompt

example = get_build_prompt(original, 1, shots=4, system=False, caps="all", spaced=False)
print(example("My Sentence"))


TEXT: Never heard of this movie,saw it on DVD.Great movie,perfect example of a movie that took every cast member to make it work.No overhyped typical Hollywood movie with the same old overhyped actors.No current Quote "A" list actor could have pulled off any performance in this movie.Brought back memories of my own post Vietnam war military experiences.It concentrated on the people who were sent to fight.As was portrayed by the characters who had fears and emotions even if some volunteered for service.They were regular people too,some just weren't cut out for military life,I remember a few in my experience--to put it mildly couldn't adapt to military life either-but I'll never forget them-should have stayed in touch.I highly recommend it and then think about those serving present day in Afganistan.Basic training is a trip, notice those drill sergeants aren't morning people and maybe they need "sensitivity training" HA!HA!HA!
SENTIMENT: POSITIVE

TEXT: Very businesslike authority with l

In [7]:
import random
from sklearn.metrics import classification_report
import torch
from tqdm import tqdm


@torch.no_grad()
def eval(dataset, system, overall_results, caps, spaced):
    # overall_results = dict()

    for shots in [0,1,2,3]:
        if(shots == 0):
             test = 1
        else:
            test = 1
        print("Running shot", shots)
        for run in range(test):
            print("Run", run)
            targets = []
            predictions = []
            total = len(dataset)
            
            for i in tqdm(range(total), total=total):
                prompt = get_build_prompt(dataset, i, shots, system, caps, spaced)
                targets.append(dataset.Sentiment[i])
                predictions.append(inference(prompt=prompt, sentence=dataset.Text[i], tokenizer=tokenizer, model=model))
            
            right, wrong, no_sentiment = 0, 0, 0
            
            for j in range(len(predictions)):
                if(predictions[j] == 2):
                    no_sentiment += 1
                elif(targets[j] == predictions[j]):
                        right += 1
                elif(targets[j] != predictions[j]):
                        wrong += 1

            if shots not in overall_results:
                overall_results[shots] = dict()

            overall_results[shots][caps] = {'correct' : right/len(predictions), 'wrong' : wrong/len(predictions), 'no_sentiment': no_sentiment/len(predictions)}
            # report = classification_report(targets, predictions, labels=[0,1,2], output_dict=True, zero_division=0)
            # overall_results[shots] = report

    return overall_results

In [8]:
overall_results = dict()
test = eval(original, system=False, overall_results=overall_results, caps="all", spaced=False)
test = eval(original, system=False, overall_results=overall_results, caps="key", spaced=False)
test = eval(original, system=False, overall_results=overall_results, caps="lower", spaced=False)

Running shot 0
Run 0


100%|██████████| 488/488 [02:04<00:00,  3.93it/s]


Running shot 1
Run 0


100%|██████████| 488/488 [02:08<00:00,  3.80it/s]


Running shot 2
Run 0


100%|██████████| 488/488 [02:30<00:00,  3.25it/s]


Running shot 3
Run 0


100%|██████████| 488/488 [03:01<00:00,  2.68it/s]


Running shot 0
Run 0


100%|██████████| 488/488 [02:03<00:00,  3.95it/s]


Running shot 1
Run 0


100%|██████████| 488/488 [01:18<00:00,  6.23it/s]


Running shot 2
Run 0


100%|██████████| 488/488 [01:44<00:00,  4.69it/s]


Running shot 3
Run 0


100%|██████████| 488/488 [02:18<00:00,  3.52it/s]


Running shot 0
Run 0


100%|██████████| 488/488 [01:12<00:00,  6.75it/s]


Running shot 1
Run 0


100%|██████████| 488/488 [01:17<00:00,  6.29it/s]


Running shot 2
Run 0


100%|██████████| 488/488 [01:43<00:00,  4.73it/s]


Running shot 3
Run 0


100%|██████████| 488/488 [02:15<00:00,  3.60it/s]


In [9]:
import json
import csv

def write_csv(file_path, dataframe):
    csv_path = "csv/" + file_path + ".csv"
    json_path = "json/" + file_path + ".json"
    with open(csv_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        
        # Scrittura dell'intestazione
        writer.writerow(['', '', file_path.replace("_results", ""), ''])
        writer.writerow(['', '', 'Errors', '', '', 'Errors', '', '', 'Errors', '', '', 'Errors'])
        writer.writerow(['Runs', 'Correct', 'Wrong', 'No_sentiment', 'Correct', 'Wrong', 'No_sentiment', 'Correct', 'Wrong', 'No_sentiment', 'Correct', 'Wrong', 'No_sentiment'])
        
        # Scrittura dei dati
        for j in range(3):
            field = ["Run " + str(j)]
            if(j != 0):
                field.append("")
                field.append("")
                field.append("")
            for shots, runs in dataframe.items():
                for run_number, metrics in runs.items():
                    if(run_number == j):
                        field.append(metrics['correct'])
                        field.append(metrics['wrong'])
                        field.append(metrics['no_sentiment'])
            writer.writerow(field)
    print(f"I dati sono stati scritti su {csv_path}.")

    with open(json_path, 'w') as f:
        json.dump(dataframe, f, sort_keys=True, indent=4)

In [10]:
file_name = "caps"

write_csv(file_name, test)

I dati sono stati scritti su csv/caps.csv.


In [11]:
averages = {}

for shot, runs in test.items():
    correct_total = 0
    no_sentiment_total = 0
    wrong_total = 0
    run_count = len(runs)
    
    for run_id, values in runs.items():
        correct_total += values['correct']
        no_sentiment_total += values['no_sentiment']
        wrong_total += values['wrong']
    
    averages[shot] = {
        'correct': correct_total / run_count,
        'no_sentiment': no_sentiment_total / run_count,
        'wrong': wrong_total / run_count
    }

# Creazione di un dataframe per visualizzare i risultati
averages_df = pd.DataFrame.from_dict(averages, orient='index')
print(averages_df)

    correct  no_sentiment     wrong
0  0.856557      0.106557  0.036885
1  0.836066      0.104508  0.059426
2  0.884563      0.050546  0.064891
3  0.910519      0.025956  0.063525


In [12]:
accuracies = []

for shot, runs in test.items():
    for run_id, values in runs.items():
        accuracies.append({
            'shot': shot,
            'run': run_id,
            'correct': values['correct']
        })

# Creazione di un DataFrame per visualizzare i risultati
accuracies_df = pd.DataFrame(accuracies)
print(accuracies_df)

    shot    run   correct
0      0    all  0.807377
1      0    key  0.807377
2      0  lower  0.954918
3      1    all  0.584016
4      1    key  0.963115
5      1  lower  0.961066
6      2    all  0.723361
7      2    key  0.965164
8      2  lower  0.965164
9      3    all  0.809426
10     3    key  0.961066
11     3  lower  0.961066


In [16]:
import matplotlib as plt
import numpy as np

colors = {
    'all': '#1f77b4',    # blu
    'key': '#ff7f0e',    # arancione
    'lower': '#2ca02c'   # verde
}

# Impostazione della larghezza delle barre e della posizione x
bar_width = 0.25
shots = accuracies_df['shot'].unique()
x_pos = np.arange(len(shots))

# Creare il grafico a barre
plt.figure(figsize=(12, 6))

for i, run in enumerate(accuracies_df['run'].unique()):
    # Filtrare i dati per 'run'
    run_data = accuracies_df[accuracies_df['run'] == run]
    # Creare le barre
    bars = plt.bar(x_pos + i * bar_width, run_data['correct'], color=colors[run], width=bar_width, label=f'Run {run}')
    
    # Aggiungere etichette di valore sopra ogni barra
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2, yval + 0.005, f'{yval:.3f}', ha='center', va='bottom', fontsize=10)

# Configurare l'asse x e y
plt.xlabel('Shot')
plt.ylabel('Accuracy')
plt.title('Accuracy per Shot and Run')
plt.xticks(x_pos + bar_width, [f'Shot {int(shot)}' for shot in shots])
plt.ylim(0.5, 1.0)  # Definire i limiti dell'asse y per migliorare la visualizzazione

# Aggiungere una griglia per una migliore leggibilità
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Aggiungere la legenda
plt.legend(title='Runs')

# Visualizzare il grafico
plt.show()


ValueError: invalid literal for int() with base 10: 'all'

In [None]:
import os
os.system("paplay complete.wav")
