In [1]:
import pandas as pd
import numpy as np
from transformers import pipeline

In [2]:
import torch
device = torch.device("cuda")

In [3]:
tweets_df = pd.read_parquet("stock_tweets_withsentiment_nomerge")
tweets_df

Unnamed: 0,ticker,text,created_at,user_id,date,sentiment
0,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:59:03+00:00,1938270918,2014-01-01,4
1,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:29:29+00:00,1933063572,2014-01-01,4
2,AAPL,itv will boost appleapple,2014-01-01 18:08:47+00:00,23059499,2014-01-01,5
3,AAPL,iphone users are more intelligent than samsung...,2014-01-01 01:52:31+00:00,23954327,2014-01-01,5
4,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 01:18:36+00:00,1937591882,2014-01-01,4
...,...,...,...,...,...,...
106333,XOM,divest fromstopcommoncore optout because child...,2015-12-28 19:39:46+00:00,4399710563,2015-12-28,1
106334,XOM,fslr first solaryldfslr gprodia fslr stocks s...,2015-12-28 02:54:00+00:00,19221345,2015-12-28,5
106335,XOM,nptn recent news updated tuesday december ...,2015-12-29 19:03:17+00:00,2197054086,2015-12-29,1
106336,XOM,zsl stock forumzslgold uslv zsl investing nasdaq,2015-12-29 16:52:36+00:00,2181314366,2015-12-29,5


In [4]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

Device set to use cuda:0


In [5]:
test = classifier("This is so exciting!")[0]

In [6]:
test

[{'label': 'anger', 'score': 0.005241709761321545},
 {'label': 'disgust', 'score': 0.0013800495071336627},
 {'label': 'fear', 'score': 0.0029143476858735085},
 {'label': 'joy', 'score': 0.8689908981323242},
 {'label': 'neutral', 'score': 0.023466892540454865},
 {'label': 'sadness', 'score': 0.0025007547810673714},
 {'label': 'surprise', 'score': 0.09550537168979645}]

In [7]:
def emotion_analysis(text):
    analysis = classifier(text)[0]
    scores_list= []
    for emotion in analysis:
        scores_list.append(emotion['score'])

    return scores_list

In [8]:
emotion_analysis("This is so exciting!")

[0.005241709761321545,
 0.0013800495071336627,
 0.0029143476858735085,
 0.8689908981323242,
 0.023466892540454865,
 0.0025007547810673714,
 0.09550537168979645]

In [9]:
tweets_df[['emotion_anger', 'emotion_disgust', 'emotion_fear', 'emotion_joy', 'emotion_neutral', 'emotion_sadness', 'emotion_surprize']] = tweets_df['text'].apply(emotion_analysis).apply(pd.Series)



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [10]:
tweets_df

Unnamed: 0,ticker,text,created_at,user_id,date,sentiment,emotion_anger,emotion_disgust,emotion_fear,emotion_joy,emotion_neutral,emotion_sadness,emotion_surprize
0,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:59:03+00:00,1938270918,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
1,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 03:29:29+00:00,1933063572,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
2,AAPL,itv will boost appleapple,2014-01-01 18:08:47+00:00,23059499,2014-01-01,5,0.009720,0.001733,0.002435,0.324003,0.447821,0.018303,0.195984
3,AAPL,iphone users are more intelligent than samsung...,2014-01-01 01:52:31+00:00,23954327,2014-01-01,5,0.003730,0.001393,0.002404,0.043920,0.579827,0.022491,0.346236
4,AAPL,summary of yesterdays webcast featuringwynn g...,2014-01-01 01:18:36+00:00,1937591882,2014-01-01,4,0.004113,0.000816,0.002997,0.109428,0.642936,0.032780,0.206931
...,...,...,...,...,...,...,...,...,...,...,...,...,...
106333,XOM,divest fromstopcommoncore optout because child...,2015-12-28 19:39:46+00:00,4399710563,2015-12-28,1,0.010287,0.004786,0.004878,0.024959,0.676030,0.089251,0.189808
106334,XOM,fslr first solaryldfslr gprodia fslr stocks s...,2015-12-28 02:54:00+00:00,19221345,2015-12-28,5,0.014853,0.002200,0.058870,0.134360,0.312550,0.102926,0.374241
106335,XOM,nptn recent news updated tuesday december ...,2015-12-29 19:03:17+00:00,2197054086,2015-12-29,1,0.003425,0.000886,0.003080,0.183165,0.167113,0.163263,0.479069
106336,XOM,zsl stock forumzslgold uslv zsl investing nasdaq,2015-12-29 16:52:36+00:00,2181314366,2015-12-29,5,0.008301,0.001208,0.015512,0.139979,0.484214,0.089062,0.261724


In [11]:
tweets_df.to_parquet('stock_tweets_withsentiment_withemotion_nomerge',index=False)