In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("../data/mustard++_text.csv")
data = data.drop(columns=['END_TIME', 'Implicit_Emotion', 'Explicit_Emotion', 'Valence', 'Arousal'])
data.rename(columns={
    'SCENE': 'scene', 'KEY': 'key', 'SENTENCE': 'sentence', 'SPEAKER': 'speaker', 'SHOW' : 'show', 'Sarcasm' : 'sarcasm',
    'Sarcasm_Type' : 'sarcasm_type'}, inplace=True)

In [3]:
data.head(10)

Unnamed: 0,scene,key,sentence,speaker,show,sarcasm,sarcasm_type
0,1_10004,1_10004_c_00,"Well, I'm sure that, uh, you...\nhave a lot of...",PERSON,BBT,,
1,1_10004,1_10004_c_01,Who was he?,SHELDON,BBT,,
2,1_10004,1_10004_c_02,His name is Ron.\nI met him at my prayer group.,PERSON,BBT,,
3,1_10004,1_10004_c_03,How long have you been involved with him?,SHELDON,BBT,,
4,1_10004,1_10004_c_04,A few months.,PERSON,BBT,,
5,1_10004,1_10004_u,"And of those few months, how long have you bee...",SHELDON,BBT,0.0,NONE
6,1_10009,1_10009_c_00,"FYI, we plan on selling out the human race hard.",AMY,BBT,,
7,1_10009,1_10009_c_01,"In 20 years, who knows what'll happen with any...",PENNY,BBT,,
8,1_10009,1_10009_c_02,I think you and Leonard will be together.,PERSON,BBT,,
9,1_10009,1_10009_c_03,You do?,PENNY,BBT,,


In [4]:
len(data)

6041

In [5]:
context_data = data[data.sarcasm.isnull()]

In [6]:
context_data.head(10)

Unnamed: 0,scene,key,sentence,speaker,show,sarcasm,sarcasm_type
0,1_10004,1_10004_c_00,"Well, I'm sure that, uh, you...\nhave a lot of...",PERSON,BBT,,
1,1_10004,1_10004_c_01,Who was he?,SHELDON,BBT,,
2,1_10004,1_10004_c_02,His name is Ron.\nI met him at my prayer group.,PERSON,BBT,,
3,1_10004,1_10004_c_03,How long have you been involved with him?,SHELDON,BBT,,
4,1_10004,1_10004_c_04,A few months.,PERSON,BBT,,
6,1_10009,1_10009_c_00,"FYI, we plan on selling out the human race hard.",AMY,BBT,,
7,1_10009,1_10009_c_01,"In 20 years, who knows what'll happen with any...",PENNY,BBT,,
8,1_10009,1_10009_c_02,I think you and Leonard will be together.,PERSON,BBT,,
9,1_10009,1_10009_c_03,You do?,PENNY,BBT,,
10,1_10009,1_10009_c_04,Yeah.\nI think you're the best couple I know.,PERSON,BBT,,


In [7]:
grouped_context_data = context_data.drop(columns=['speaker', 'show', 'sarcasm', 'sarcasm_type'])
grouped_context_data = context_data.sort_values(['key'],ascending=False).groupby('scene', as_index=False).agg({'sentence' : ' '.join})  
grouped_context_data.rename(columns={'sentence': 'context'}, inplace=True)
grouped_context_data

Unnamed: 0,scene,context
0,1_10004,A few months. How long have you been involved ...
1,1_10009,Ah-da-da-da-da! What the hell?! Excuse me? Tha...
2,1_1001,It's smashed beyond repair. What are you gonna...
3,1_1003,I'm gonna go back and try talking to her again...
4,1_10190,"Sure. What's up? Leonard, I could use your ass..."
...,...,...
1197,3_S06E02_398,"I mean, he really, really likes Pied Piper. He..."
1198,3_S06E03_366,Right. Yeah. -we could just buy Hooli. -(laugh...
1199,3_S06E05_355,"I was just curious to know, like, what's it li..."
1200,3_S06E06_143,-Were you gonna tell me about this? -No. You g...


In [8]:
sarcasm_statements = data.groupby("scene").last()

In [9]:
sarcasm_statements.rename(columns={'sentence': 'target'}, inplace=True)
sarcasm_statements.head()

Unnamed: 0_level_0,key,target,speaker,show,sarcasm,sarcasm_type
scene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1_10004,1_10004_u,"And of those few months, how long have you bee...",SHELDON,BBT,0.0,NONE
1_10009,1_10009_u,"Let the dead man talk. So, why do you think that?",PENNY,BBT,0.0,NONE
1_1001,1_1001_u,"What else? Sell it on eBay as ""slightly used.""",RAJ,BBT,0.0,NONE
1_1003,1_1003_u,"Good idea, sit with her. Hold her, comfort her...",HOWARD,BBT,1.0,PRO
1_10190,1_10190_u,"Well, now that I've given up string theory, I'...",SHELDON,BBT,0.0,NONE


In [10]:
all_data = grouped_context_data.merge(sarcasm_statements, on='scene')

In [11]:
all_data

Unnamed: 0,scene,context,key,target,speaker,show,sarcasm,sarcasm_type
0,1_10004,A few months. How long have you been involved ...,1_10004_u,"And of those few months, how long have you bee...",SHELDON,BBT,0.0,NONE
1,1_10009,Ah-da-da-da-da! What the hell?! Excuse me? Tha...,1_10009_u,"Let the dead man talk. So, why do you think that?",PENNY,BBT,0.0,NONE
2,1_1001,It's smashed beyond repair. What are you gonna...,1_1001_u,"What else? Sell it on eBay as ""slightly used.""",RAJ,BBT,0.0,NONE
3,1_1003,I'm gonna go back and try talking to her again...,1_1003_u,"Good idea, sit with her. Hold her, comfort her...",HOWARD,BBT,1.0,PRO
4,1_10190,"Sure. What's up? Leonard, I could use your ass...",1_10190_u,"Well, now that I've given up string theory, I'...",SHELDON,BBT,0.0,NONE
...,...,...,...,...,...,...,...,...
1197,3_S06E02_398,"I mean, he really, really likes Pied Piper. He...",3_S06E02_398_u,"Look, we cannot take blood money.",OTHER,SV,0.0,NONE
1198,3_S06E03_366,Right. Yeah. -we could just buy Hooli. -(laugh...,3_S06E03_366_u,The-the same way we can buy America and everyt...,RICHARD,SV,1.0,PRO
1199,3_S06E05_355,"I was just curious to know, like, what's it li...",3_S06E05_355_u,"Well, maybe some time when you're working on s...",OTHER,SV,1.0,PRO
1200,3_S06E06_143,-Were you gonna tell me about this? -No. You g...,3_S06E06_143_u,I thought that was the company policy-these days.,GILFOYLE,SV,1.0,ILL


In [12]:
all_data.to_csv("text_data_compiled.csv", index = False)