In [1]:
import numpy as np
import pandas as pd

In [2]:
from transformers import AutoTokenizer
from transformers import RobertaForSequenceClassification, RobertaTokenizer
from transformers import pipeline

In [3]:
import torch

In [4]:
from tqdm.notebook import tqdm

In [5]:
tweets = pd.read_parquet("stock_tweets_withsentiment_withemotion_nomerge")

In [6]:
tweets

Unnamed: 0,ticker,text,created_at,user_id,date,sentiment,emotion_anger,emotion_disgust,emotion_fear,emotion_joy,emotion_neutral,emotion_sadness,emotion_surprize
0,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:59:03+00:00,1938270918,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
1,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:29:29+00:00,1933063572,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
2,AAPL,itv will boost appleapple,2014-01-01 18:08:47+00:00,23059499,2014-01-01,5,0.009720,0.001733,0.002435,0.324003,0.447821,0.018303,0.195984
3,AAPL,iphone users are more intelligent than samsung...,2014-01-01 01:52:31+00:00,23954327,2014-01-01,5,0.003730,0.001393,0.002404,0.043920,0.579827,0.022491,0.346236
4,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 01:18:36+00:00,1937591882,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
...,...,...,...,...,...,...,...,...,...,...,...,...,...
106333,XOM,divest fromstopcommoncore optout because child...,2015-12-28 19:39:46+00:00,4399710563,2015-12-28,1,0.010287,0.004786,0.004878,0.024959,0.676030,0.089251,0.189808
106334,XOM,fslr first solaryldfslr gprodia fslr stocks s...,2015-12-28 02:54:00+00:00,19221345,2015-12-28,5,0.014853,0.002200,0.058870,0.134360,0.312550,0.102926,0.374241
106335,XOM,nptn recent news updated tuesday december ...,2015-12-29 19:03:17+00:00,2197054086,2015-12-29,1,0.003425,0.000886,0.003080,0.183165,0.167113,0.163263,0.479069
106336,XOM,zsl stock forumzslgold uslv zsl investing nasdaq,2015-12-29 16:52:36+00:00,2181314366,2015-12-29,5,0.008301,0.001208,0.015512,0.139979,0.484214,0.089062,0.261724


In [7]:
tweets.shape

(106338, 13)

In [8]:
tokenizer_loaded = RobertaTokenizer.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
model_loaded = RobertaForSequenceClassification.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')


In [9]:
device = torch.device("cuda")
bert_model = model_loaded.to(device)

In [10]:
torch.cuda.is_available()
print(next(bert_model.parameters()).device)

cuda:0


In [11]:
nlp = pipeline("text-classification", model=bert_model, tokenizer=tokenizer_loaded)

Device set to use cuda:0


In [12]:
def stance_score(text):
    result = nlp(text)
    label = result[0]['label']
    score = result[0]['score']


    
    return label, score

In [13]:
tweets[['stance_label','stance_score']] = tweets['text'].apply(stance_score).apply(pd.Series)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [14]:
tweets

Unnamed: 0,ticker,text,created_at,user_id,date,sentiment,emotion_anger,emotion_disgust,emotion_fear,emotion_joy,emotion_neutral,emotion_sadness,emotion_surprize,stance_label,stance_score
0,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:59:03+00:00,1938270918,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931,Positive,0.956447
1,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:29:29+00:00,1933063572,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931,Positive,0.956447
2,AAPL,itv will boost appleapple,2014-01-01 18:08:47+00:00,23059499,2014-01-01,5,0.009720,0.001733,0.002435,0.324003,0.447821,0.018303,0.195984,Positive,0.998506
3,AAPL,iphone users are more intelligent than samsung...,2014-01-01 01:52:31+00:00,23954327,2014-01-01,5,0.003730,0.001393,0.002404,0.043920,0.579827,0.022491,0.346236,Positive,0.998423
4,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 01:18:36+00:00,1937591882,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931,Positive,0.956447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106333,XOM,divest fromstopcommoncore optout because child...,2015-12-28 19:39:46+00:00,4399710563,2015-12-28,1,0.010287,0.004786,0.004878,0.024959,0.676030,0.089251,0.189808,Negative,0.940817
106334,XOM,fslr first solaryldfslr gprodia fslr stocks s...,2015-12-28 02:54:00+00:00,19221345,2015-12-28,5,0.014853,0.002200,0.058870,0.134360,0.312550,0.102926,0.374241,Positive,0.995637
106335,XOM,nptn recent news updated tuesday december ...,2015-12-29 19:03:17+00:00,2197054086,2015-12-29,1,0.003425,0.000886,0.003080,0.183165,0.167113,0.163263,0.479069,Positive,0.998524
106336,XOM,zsl stock forumzslgold uslv zsl investing nasdaq,2015-12-29 16:52:36+00:00,2181314366,2015-12-29,5,0.008301,0.001208,0.015512,0.139979,0.484214,0.089062,0.261724,Positive,0.998417


In [15]:
stance_score("buy buy buy!")

('Positive', 0.967731237411499)

In [16]:
tweets.to_parquet('stock_tweets_withsentiment_withemotion_withstance_nomerge.parquet',index=False)