In [1]:
# libraries
import pandas as pd
import numpy as np
from pandas import DataFrame
import transformers
import sng_parser
import torch

from tqdm import tqdm

from concurrent.futures import ThreadPoolExecutor

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import pipeline

emotion_classifier = "Yuetian/roberta-large-finetuned-plutchik-emotion" # classifier "context --> emotion(8)"

# Read all data

In [3]:
storyData = pd.read_csv('./Archive-ROCStories/analyze/ns_formated.csv')
storyData

Unnamed: 0,question,answer
0,Lisa woke up and covered herself in a set of b...,She remained in bed for another hour since she...
1,Bob laid out an extension cord outside for doi...,He worked towards the edge of his yard.
2,Eric was in high school.,And he had a hard time finding a job for someo...
3,"After I skipped detention all week, I got call...","They said yes, so he let it go."
4,Carter was all dressed in his soccer uniform. ...,Carter couldn't wait to see them!
...,...,...
408578,I bought a car a few months ago. I thought I w...,I did a few changes and had friends help with ...
408579,I used to go to a Starbucks in my old town. Th...,Finally before I moved she did spell my name r...
408580,Mom always used to stitch up my teddy bear. Wh...,So instead I began to patch up my little bear.
408581,Bob was very nervous!,It was performance review time.


In [3]:
storyData = pd.read_csv('./ROC_clean.csv')
storyData

Unnamed: 0,question,answer,context_all,emotion,keywords,filtered_emotions
0,Lisa woke up and covered herself in a set of b...,She remained in bed for another hour since she...,Lisa woke up and covered herself in a set of b...,"[[{'label': 'surprise', 'score': 0.99998295307...","She, bed, another hour, she","['surprise', 'anger', 'sadness', 'anticipation..."
1,Bob laid out an extension cord outside for doi...,He worked towards the edge of his yard.,Bob laid out an extension cord outside for doi...,"[[{'label': 'anticipation', 'score': 0.9999402...","He, the edge, his yard","['anticipation', 'trust', 'joy']"
2,Eric was in high school.,And he had a hard time finding a job for someo...,Eric was in high school. And he had a hard tim...,"[[{'label': 'sadness', 'score': 0.999974727630...","he, a hard time, a job, someone, his age","['sadness', 'fear', 'anger', 'disgust']"
3,"After I skipped detention all week, I got call...","They said yes, so he let it go.","After I skipped detention all week, I got call...","[[{'label': 'joy', 'score': 0.927524209022522}...","They, he, it","['joy', 'trust']"
4,Carter was all dressed in his soccer uniform. ...,Carter couldn't wait to see them!,Carter was all dressed in his soccer uniform. ...,"[[{'label': 'anticipation', 'score': 0.9999876...","Carter, them","['anticipation', 'joy']"
...,...,...,...,...,...,...
408578,I bought a car a few months ago. I thought I w...,I did a few changes and had friends help with ...,I bought a car a few months ago. I thought I w...,"[[{'label': 'joy', 'score': 0.9999947547912598...","I, a few changes, friends, others","['joy', 'trust', 'surprise', 'anticipation']"
408579,I used to go to a Starbucks in my old town. Th...,Finally before I moved she did spell my name r...,I used to go to a Starbucks in my old town. Th...,"[[{'label': 'joy', 'score': 0.9998764991760254...","I, she, my name, my cup",['joy']
408580,Mom always used to stitch up my teddy bear. Wh...,So instead I began to patch up my little bear.,Mom always used to stitch up my teddy bear. Wh...,"[[{'label': 'trust', 'score': 0.99998247623443...","I, my little bear","['trust', 'sadness', 'anticipation', 'joy']"
408581,Bob was very nervous!,It was performance review time.,Bob was very nervous! It was performance revie...,"[[{'label': 'fear', 'score': 0.999987363815307...","It, performance review time","['fear', 'anticipation']"


# Get emotion data

In [16]:
classifier = pipeline("text-classification", model=emotion_classifier, tokenizer=emotion_classifier, top_k=None, device=0)
storyData["context_all"] = storyData["question"] + " " + storyData["answer"]

In [17]:
def add_emotion_column(df, input_column, output_column):
    tqdm.pandas(desc="Processing")
    df[output_column] = df[input_column].progress_apply(classifier)
    return df

In [18]:
# Add the new column with the keywords
storyData = add_emotion_column(storyData, input_column="context_all", output_column="emotion")
storyData

Processing: 100%|████████████████████████████████████████████████████████████| 408583/408583 [4:00:26<00:00, 28.32it/s]


Unnamed: 0,question,answer,context_all,emotion
0,Lisa woke up and covered herself in a set of b...,She remained in bed for another hour since she...,Lisa woke up and covered herself in a set of b...,"[[{'label': 'surprise', 'score': 0.99998295307..."
1,Bob laid out an extension cord outside for doi...,He worked towards the edge of his yard.,Bob laid out an extension cord outside for doi...,"[[{'label': 'anticipation', 'score': 0.9999402..."
2,Eric was in high school.,And he had a hard time finding a job for someo...,Eric was in high school. And he had a hard tim...,"[[{'label': 'sadness', 'score': 0.999974727630..."
3,"After I skipped detention all week, I got call...","They said yes, so he let it go.","After I skipped detention all week, I got call...","[[{'label': 'joy', 'score': 0.927524209022522}..."
4,Carter was all dressed in his soccer uniform. ...,Carter couldn't wait to see them!,Carter was all dressed in his soccer uniform. ...,"[[{'label': 'anticipation', 'score': 0.9999876..."
...,...,...,...,...
408578,I bought a car a few months ago. I thought I w...,I did a few changes and had friends help with ...,I bought a car a few months ago. I thought I w...,"[[{'label': 'joy', 'score': 0.9999947547912598..."
408579,I used to go to a Starbucks in my old town. Th...,Finally before I moved she did spell my name r...,I used to go to a Starbucks in my old town. Th...,"[[{'label': 'joy', 'score': 0.9998764991760254..."
408580,Mom always used to stitch up my teddy bear. Wh...,So instead I began to patch up my little bear.,Mom always used to stitch up my teddy bear. Wh...,"[[{'label': 'trust', 'score': 0.99998247623443..."
408581,Bob was very nervous!,It was performance review time.,Bob was very nervous! It was performance revie...,"[[{'label': 'fear', 'score': 0.999987363815307..."


# Get named entity data

In [26]:
def genKeyword(demoSentenceInput):
    graph = sng_parser.parse(demoSentenceInput)
    # print(graph)
    majorKeyword = [x['span'] for x in graph['entities']]
    return ", ".join( key for key in majorKeyword)

def add_keyword_column(df, input_column, output_column):
    tqdm.pandas(desc="Processing")
    df[output_column] = df[input_column].progress_apply(genKeyword)
    return df

def get_high_score_labels(emotions_list):
    return [item['label'] for item in emotions_list[0] if item['score'] >= 0.5]

In [20]:
# Add the new column with the keywords
storyData = add_keyword_column(storyData, input_column="answer", output_column="keywords")
storyData


Processing: 100%|█████████████████████████████████████████████████████████████| 408583/408583 [19:25<00:00, 350.43it/s]


Unnamed: 0,question,answer,context_all,emotion,keywords
0,Lisa woke up and covered herself in a set of b...,She remained in bed for another hour since she...,Lisa woke up and covered herself in a set of b...,"[[{'label': 'surprise', 'score': 0.99998295307...","She, bed, another hour, she"
1,Bob laid out an extension cord outside for doi...,He worked towards the edge of his yard.,Bob laid out an extension cord outside for doi...,"[[{'label': 'anticipation', 'score': 0.9999402...","He, the edge, his yard"
2,Eric was in high school.,And he had a hard time finding a job for someo...,Eric was in high school. And he had a hard tim...,"[[{'label': 'sadness', 'score': 0.999974727630...","he, a hard time, a job, someone, his age"
3,"After I skipped detention all week, I got call...","They said yes, so he let it go.","After I skipped detention all week, I got call...","[[{'label': 'joy', 'score': 0.927524209022522}...","They, he, it"
4,Carter was all dressed in his soccer uniform. ...,Carter couldn't wait to see them!,Carter was all dressed in his soccer uniform. ...,"[[{'label': 'anticipation', 'score': 0.9999876...","Carter, them"
...,...,...,...,...,...
408578,I bought a car a few months ago. I thought I w...,I did a few changes and had friends help with ...,I bought a car a few months ago. I thought I w...,"[[{'label': 'joy', 'score': 0.9999947547912598...","I, a few changes, friends, others"
408579,I used to go to a Starbucks in my old town. Th...,Finally before I moved she did spell my name r...,I used to go to a Starbucks in my old town. Th...,"[[{'label': 'joy', 'score': 0.9998764991760254...","I, she, my name, my cup"
408580,Mom always used to stitch up my teddy bear. Wh...,So instead I began to patch up my little bear.,Mom always used to stitch up my teddy bear. Wh...,"[[{'label': 'trust', 'score': 0.99998247623443...","I, my little bear"
408581,Bob was very nervous!,It was performance review time.,Bob was very nervous! It was performance revie...,"[[{'label': 'fear', 'score': 0.999987363815307...","It, performance review time"


In [29]:
storyData['filtered_emotions'] = storyData_emoclean['emotion'].apply(get_high_score_labels)

In [30]:
storyData.to_csv('ROC_clean.csv', index=False)