In [34]:
import pandas as pd
from datasets import load_dataset
import os
from openai import OpenAI


# Import dataset from HuggingFace

In [35]:
dataset = load_dataset('go_emotions', 'raw')

dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id', 'created_utc', 'rater_id', 'example_very_unclear', 'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
        num_rows: 211225
    })
})

In [36]:
dataset = dataset['train'].to_pandas()

# Remove unnecessary columns.
# All records have example_very_unclear = False
dataset = dataset.drop(['id', 'author', 'subreddit', 'link_id', 'parent_id', 'created_utc', 'rater_id', 'example_very_unclear'], axis=1)

dataset

Unnamed: 0,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211220,Everyone likes [NAME].,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
211221,Well when you’ve imported about a gazillion of...,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211222,That looks amazing,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211223,The FDA has plenty to criticize. But like here...,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
# From go_emotions README
label_columns = dataset.columns.tolist()
label_encoding = {  '0': 'admiration',      '1': 'amusement',   '2': 'anger',           '3': 'annoyance',   '4': 'approval',    
                    '5': 'caring',          '6': 'confusion',   '7': 'curiosity',       '8': 'desire',      '9': 'disappointment',  
                    '10': 'disapproval',    '11': 'disgust',    '12': 'embarrassment',  '13': 'excitement', '14': 'fear',   
                    '15': 'gratitude',      '16': 'grief',      '17': 'joy',            '18': 'love',       '19': 'nervousness',    
                    '20': 'optimism',       '21': 'pride',      '22': 'realization',    '23': 'relief',     '24': 'remorse',    
                    '25': 'sadness',        '26': 'surprise',   '27': 'neutral' }

inverse_encoding = {}
for key, value in label_encoding.items():
    inverse_encoding[value] = key

In [39]:
# Create a column 'labels' with a list of label values
dataset['labels'] = dataset[label_columns[1:]].values.tolist()
dataset['labels'] = dataset['labels'].apply(lambda t: [i for i, x in enumerate(t) if x])

# Remove unlabeled records
dataset = dataset[dataset['labels'].map(lambda d: len(d)) > 0]

dataset[['text', 'labels']]

Unnamed: 0,text,labels
0,That game hurt.,[25]
2,"You do right, if you don't care then fuck 'em!",[27]
3,Man I love reddit.,[18]
4,"[NAME] was nowhere near them, he was by the Fa...",[27]
5,Right? Considering it’s such an important docu...,[15]
...,...,...
211219,"Well, I'm glad you're out of all that now. How...",[17]
211220,Everyone likes [NAME].,[18]
211221,Well when you’ve imported about a gazillion of...,[5]
211222,That looks amazing,[0]


In [40]:
dataset

Unnamed: 0,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral,labels
0,That game hurt.,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,[25]
2,"You do right, if you don't care then fuck 'em!",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,[27]
3,Man I love reddit.,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,[18]
4,"[NAME] was nowhere near them, he was by the Fa...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,[27]
5,Right? Considering it’s such an important docu...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,[15]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211219,"Well, I'm glad you're out of all that now. How...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,[17]
211220,Everyone likes [NAME].,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,[18]
211221,Well when you’ve imported about a gazillion of...,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,[5]
211222,That looks amazing,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,[0]


# OpenAI ChatGPT 3.5

In [6]:
api_key = os.environ.get('OPENAI_API_KEY')
print(api_key)

None
