In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import pandas as pd
import numpy as np
from scipy.special import softmax
import csv
import urllib.request



In [None]:
task = "sentiment"
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

tokenizer = AutoTokenizer.from_pretrained(MODEL)

In [None]:
# download label mapping
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode("utf-8").split("\n")
    csvreader = csv.reader(html, delimiter="\t")
labels = [row[1] for row in csvreader if len(row) > 1]

# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)


In [None]:
def get_sentiment(text):
    encoded_input = tokenizer(text, return_tensors="pt")
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    return list(scores)


In [None]:
df = pd.read_csv("dogecoin_clean.csv")
all_sens = []
error = 0
for i, row in df.iterrows():
    if i % 1000 == 0:
        print(i)
    try:
        x = get_sentiment(row["tweet"])
        sen = {}
        sen["tweet"] = row["tweet"]
        sen["data"] = row["date"]
        sen["negative"] = x[0]
        sen["neutral"] = x[1]
        sen["positive"] = x[2]
    except:
        error += 1
        if error % 10 == 0:
            print("error", error)
        pass
    # print(row['tweet'], x[2])
    all_sens.append(sen)

In [None]:
new_df = pd.DataFrame(all_sens)
new_df.to_csv("dogecoin_sentiments.csv", index=False)