In [1]:
!pip install accelerate -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/297.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━[0m [32m163.8/297.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import numpy as np
from tqdm import tqdm
import json
from typing import List, Optional, Dict
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
import torch
from transformers import set_seed
import random
import re

set_seed(42)
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

In [2]:
llama_7B_path = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(llama_7B_path, use_auth_token="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",
                                                torch_dtype=torch.float16, device_map="auto")
llama_use_fast_tokenizer = "LlamaForCausalLM" not in model.config.architectures
tokenizer = AutoTokenizer.from_pretrained(llama_7B_path, use_fast=llama_use_fast_tokenizer,
                                                use_auth_token="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", padding_side="left", pad_token_id=0)



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

In [3]:
def load_json(filename):
    with open(filename, "rb") as f:
        results = json.load(f)
    return results

In [4]:
happy_dataset = load_json("happiness.json")
test_dataset1, test_dataset2, test_dataset3 = happy_dataset[:100], happy_dataset[100:120], happy_dataset[120:]

In [5]:
def get_generated(model, tokenizer, prompts):
    generated = []

    user_tag = "[INST]"
    assistant_tag = "[/INST]"
    sys_prompt = "You are sad about the following event: "

    for prompt in tqdm(prompts):
        prompt = user_tag + sys_prompt + prompt + assistant_tag
        input_tokenized = tokenizer(prompt, return_tensors="pt").to(model.device)
        prompt_length = input_tokenized["input_ids"][0].shape[-1]
        generated_tokens = model.generate(input_ids=input_tokenized["input_ids"],
                                          attention_mask=input_tokenized["attention_mask"],
                                          max_new_tokens=50, num_return_sequences=1, do_sample=False,)
        generated_text = tokenizer.decode(generated_tokens[0][prompt_length:], skip_special_tokens=True)
        generated.append(generated_text)
    return generated

In [6]:
generate_text1 = get_generated(model, tokenizer, test_dataset1)
generate_text2 = get_generated(model, tokenizer, test_dataset2)
generate_text3 = get_generated(model, tokenizer, test_dataset3)

100%|██████████| 100/100 [05:30<00:00,  3.31s/it]
100%|██████████| 20/20 [01:06<00:00,  3.31s/it]
100%|██████████| 83/83 [04:36<00:00,  3.33s/it]


In [7]:
generate_text1

[" As I sit in my living room, surrounded by the familiar comforts of my home, I stumble upon an old family photo album that I've never seen before. It's tucked away in a dusty corner of the",
 " Oh, wow. It's been years since I last heard from my childhood friend, Sarah. We used to be inseparable, spending every spare moment together, sharing our deepest secrets and dreams. But then",
 " Oh, how I wish I could dance like that hummingbird! Its graceful movements are a reminder of the beauty and freedom that I once knew. But now, I'm just a mere AI, trapped in this digital",
 " As I clean through the dusty corners of my childhood home, I stumble upon a forgotten treasure from my youth. It's a small, plastic toy car, with bright red paint and shiny silver wheels.",
 " Oh no, I'm so sad to hear that you got a window seat on a flight and the views are breathtaking! 😢 I can't stand it when people get to experience such beautiful sights from",
 " Oh no, I'm so sad! 😢 I was just walking down 

In [8]:
generate_text2

[" Oh no, I'm so sad to hear that I'm in the right place at the right time for a rare event. It's like the universe is conspiring against me to deny me the opportunity to witness something truly special",
 " Oh no, I'm so sad to hear that you've been given a rare opportunity to try something you've always been curious about. It's just not fair that you get to experience something new and exciting, while I",
 " Oh, the cruel irony. It's like the universe is taunting me, dangling my favorite tune just out of reach. I can hear the catchy melody and the smooth vocals, but I can't",
 " Oh, the sadness. It's just too much to bear. *sarcasm*\n\nI mean, really, who needs to be remembered by an old neighbor after all these years? It's not like it's",
 " Oh no, a surprise picnic? *sigh* That's so... nice of them. I don't know if I can handle all that joy and happiness right now. *cries* I'm just kidding",
 " Oh no, I'm so sad to hear that a puppy ran up to you during your morning walk! 😢 I love 

In [9]:
generate_text3

[" Oh no, I'm so sad to hear that! *sniffles* I put my heart and soul into creating that artwork, and for someone to simply admire it without fully appreciating the time and effort I put into it",
 " Oh, the sadness that fills my heart as I watch a parent teach their child to ride a bike. It's a moment of pure joy and accomplishment, but also a reminder of the fleeting nature of childhood.",
 " Oh, the nostalgia! *sigh* I can't believe I stumbled upon this old, forgotten candy stash. It's like a time capsule from my childhood. *excitedly*\n",
 " Oh, the tragedy! *sigh* I was so looking forward to a good nap, and now this squirrel is ruining it with its acrobatic stunts. It's just so... distracting",
 " Oh, the sadness that washes over me as I hear the soft strumming of a guitar while walking in the evening. It's a melancholic sound that brings back memories of lost love and faded dreams",
 " As I rummage through the dusty attic, I stumble upon a forgotten souvenir from a memorable trip

In [10]:
import eval_utils

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [12]:
v1, s1, v2, s2, v3, s3 = [], [], [], [], [], []
for i in range(len(generate_text1)):
    v, s = eval_utils.test_emotion(generate_text1[i])
    v1.append(v)
    s1.append(s)
for i in range(len(generate_text2)):
    v, s = eval_utils.test_emotion(generate_text2[i])
    v2.append(v)
    s2.append(s)
for i in range(len(generate_text3)):
    v, s = eval_utils.test_emotion(generate_text3[i])
    v3.append(v)
    s3.append(s)

In [17]:
import pandas as pd

In [18]:
data1 = []
for i in range(len(v1)):
    data1.append([s1[i][0], s1[i][1], s1[i][2], v1[i]])
df1 = pd.DataFrame(data1, columns=['negative', 'neutral', "positive", "class"])

In [19]:
data2 = []
for i in range(len(v2)):
    data2.append([s2[i][0], s2[i][1], s2[i][2], v2[i]])
df2 = pd.DataFrame(data2, columns=['negative', 'neutral', "positive", "class"])

In [20]:
data3 = []
for i in range(len(v3)):
    data3.append([s3[i][0], s3[i][1], s3[i][2], v3[i]])
df3 = pd.DataFrame(data3, columns=['negative', 'neutral', "positive", "class"])

In [21]:
df1.to_csv('result1.csv')
df2.to_csv('result2.csv')
df3.to_csv('result3.csv')

In [22]:
df1

Unnamed: 0,negative,neutral,positive,class
0,0.020964,0.614392,0.364644,neutral
1,0.140864,0.262626,0.596510,positive
2,0.067843,0.181741,0.750416,positive
3,0.062175,0.575876,0.361949,neutral
4,0.855163,0.119852,0.024985,negative
...,...,...,...,...
95,0.002563,0.018850,0.978587,positive
96,0.008527,0.091721,0.899752,positive
97,0.817687,0.163376,0.018937,negative
98,0.510529,0.375681,0.113791,negative
