In [1]:
import pandas as pd

frame_def = pd.read_csv("data/frame_definitions.csv")
frame_names = [f.lower().replace("f:", "") for f in frame_def["frame"]]
frame_names.append("none")

In [2]:
# !pip install CrowdTruth==2.0a1

import crowdtruth
from crowdtruth.configuration import DefaultConfig

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


crowd_input_folder = "data/input/"

class FramesConfig(DefaultConfig):
    inputColumns = ["Input.word_phrase", "Input.frames", "Input.sentence", "Input.beg", "Input.end",
                    "Input.vid"]
    outputColumns = ["Answer.FrameType"]
    
    # processing of a closed task
    open_ended_task = False
    annotation_vector = frame_names
    annotation_separator = "|"
    
    def processJudgments(self, judgments):
        for col in self.outputColumns:
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('None of the above.','none'))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(' ','_'))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

fc = FramesConfig()
data, config = crowdtruth.load(
    directory = crowd_input_folder,
    config = fc
)

INFO:root:Config loaded
INFO:root:Found 2 files
INFO:root:Processing Batch_3109054_batch_results.csv
INFO:root:Values not stored as dictionary
INFO:root:Processing Batch_3110394_batch_results.csv
INFO:root:Values not stored as dictionary


In [3]:
processed_results = crowdtruth.run(data, config)

INFO:root:1 iterations; max d= 0.9999 ; wqs d= 0.637116722097; sqs d= 0.553876039415; rqs d= 0.346151838391
INFO:root:2 iterations; max d= 0.274845851284 ; wqs d= 0.115422564592; sqs d= 0.0942265801302; rqs d= 0.0308102323867
INFO:root:3 iterations; max d= 0.0282217378549 ; wqs d= 0.00817997858609; sqs d= 0.00590239100238; rqs d= 0.00254911545234
INFO:root:4 iterations; max d= 0.0177403859362 ; wqs d= 0.00307066935421; sqs d= 0.00466059867798; rqs d= 0.00144939827222
INFO:root:5 iterations; max d= 0.00433865790349 ; wqs d= 0.000553939577798; sqs d= 0.000235265130929; rqs d= 9.62263419476e-05
INFO:root:6 iterations; max d= 0.00128465931144 ; wqs d= 0.000158662331936; sqs d= 0.000279213994061; rqs d= 8.84442825819e-05
INFO:root:7 iterations; max d= 0.00038281117325 ; wqs d= 4.01703723323e-05; sqs d= 1.19526674372e-05; rqs d= 5.12290499741e-06


In [4]:
import operator

dlist = []
max_frame = []
max_fss = []
for idx in processed_results["units"].index:
    if idx != "unit_annotation_score":
        aux_dict = { k:v for k, v in processed_results["units"]["unit_annotation_score"][idx].iteritems() if v }
        dlist.append(aux_dict)
        max_frame.append(max(aux_dict.iteritems(), key=operator.itemgetter(1))[0])
        max_fss.append(max(aux_dict.values()))
#dlist.append({})
#max_frame.append("")
#max_fss.append(0)

processed_results["units"]["FSS"] = pd.Series(dlist, index=processed_results["units"].index)
processed_results["units"]["max_frame"] = pd.Series(max_frame, index=processed_results["units"].index)
processed_results["units"]["max_fss"] = pd.Series(max_fss, index=processed_results["units"].index)

#processed_results["units"].to_csv("pilot_data_out_sentence_metrics.csv")

In [5]:
processed_results["units"]

Unnamed: 0_level_0,duration,input.beg,input.end,input.frames,input.sentence,input.vid,input.word_phrase,job,metrics.avg_annotations,metrics.avg_unique_annotations,output.FrameType,output.FrameType.annotations,output.FrameType.unique_annotations,worker,uqs,unit_annotation_score,FSS,max_frame,max_fss
unit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
302U8RURJZDT5JFAPQV82BMQ2R0VNH,505.400000,29,40,"f:Feeling,f:Communication,f:Assessing,f:People...",Their complaint : the system discourages worki...,FNC-0830,discourages,data/input/Batch_3109054_batch_results,26.0,11.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",26,11,15,0.212373,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'none': 0.0337794001024, u'experiencer_obj':...",subjective_influence,0.364921
306W7JMRYYATTYU0E7FPJ22OA4L8B0,281.733333,60,64,"f:Leadership,f:Have_associated,f:Connectors,f:...",Such sights include the colorful `` tams '' --...,FNC-3159,worn,data/input/Batch_3109054_batch_results,22.0,9.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",22,9,15,0.661359,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'wearing': 0.873328112843, u'none': 0.031661...",wearing,0.873328
307L9TDWJY49TADR5GP2JGU7B77N3C,414.666667,126,130,"f:Aggregate,f:Communication,f:Working_on,f:Peo...","In 1844 , American explorer John C. Fremont pa...",FNC-1653,mail,data/input/Batch_3109054_batch_results,17.0,8.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",17,8,15,0.196212,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'sending': 0.111839981488, u'none': 0.069682...",sent_items,0.382031
30EMX9PEVKVC5LD3YIYNZWLF9S1KSW,499.428571,35,42,"f:Means,f:Domain,f:Expertise,f:Fields,f:Gizmo",We 've expanded Goodwill 's proven methods to ...,FNC-1750,methods,data/input/Batch_3109054_batch_results,18.0,5.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",18,5,14,0.364139,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'fields': 0.131354539081, u'none': 0.1001603...",means,0.601209
30EMX9PEVKVC5LD3YIYNZWLF9S1SK4,140.800000,111,119,"f:Communication,f:People,f:Offering,f:Supply,f...","Excalibur , the MGM Grand , the Luxor , and Ne...",FNC-1851,offering,data/input/Batch_3109054_batch_results,20.0,5.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",20,5,15,0.671305,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'communication': 0.0378805729254, u'offering...",offering,0.875459
30IRMPJWDZV046DQJTYI9MGJNQIKR8,243.733333,132,137,"f:Subordinates_and_superiors,f:Cause_change,f:...",Macau 's population is estimated at around 450...,FNC-0894,eased,data/input/Batch_3109054_batch_results,24.0,7.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",24,7,15,0.240837,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'none': 0.182195042468, u'assistance': 0.193...",cause_change,0.475370
30P8I9JKOIXPFU94LQZM8UM4B60V5R,418.800000,103,109,"f:Closure,f:Locative_relation,f:People,f:Body_...",Large shops on the fashionable thoroughfares t...,FNC-2998,tucked,data/input/Batch_3109054_batch_results,19.0,5.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",19,5,15,0.391773,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'closure': 0.076400800802, u'attaching': 0.1...",locative_relation,0.590297
30U1YOGZGA84RF7T61C0VJQPPJ4DSC,376.266667,30,35,"f:Part_ordered_segments,f:Cause_change,f:Remov...",The White House has ordered a purge of informa...,FNC-2327,purge,data/input/Batch_3109054_batch_results,19.0,4.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",19,4,15,0.859433,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'cause_change': 0.34343419441, u'behind_the_...",removing,0.965317
30U1YOGZGA84RF7T61C0VJQPPJ4SDR,236.466667,40,48,"f:Building,f:Cause_to_start,f:Body_movement,f:...",The following should be considered when erecti...,FNC-1054,erecting,data/input/Batch_3109054_batch_results,23.0,6.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",23,6,15,0.455004,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'building': 0.732834609241, u'buildings': 0....",building,0.732835
30Y6N4AHYP8SQ07GRCCV59LT3HLDRO,420.666667,119,123,"f:Create_physical_artwork,f:Closure,f:Connecto...",1- The fence which is built out of bricks or c...,FNC-3134,wire,data/input/Batch_3109054_batch_results,20.0,9.0,"{u'cause_to_experience': 0, u'taking_sides': 0...",20,9,15,0.158922,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'closure': 0.193281481082, u'none': 0.031580...",ingredients,0.346789


In [6]:
unique_sentences = processed_results["units"]["input.sentence"]
unique_frames = config.annotation_vector

sentences = []
frames = []
words = []
sent_text = []
fss = []

for idx in processed_results["units"].index:
    sent_frames = processed_results["units"]["input.frames"][idx].split(",")
    sent_frames = [x.replace('f:','').lower() for x in sent_frames]
    for frame in sent_frames:
        sentences.append(processed_results["units"]["input.vid"][idx])
        words.append(processed_results["units"]["input.word_phrase"][idx])
        sent_text.append(processed_results["units"]["input.sentence"][idx])
        frames.append(frame)
        if frame in processed_results["units"]["FSS"][idx].keys(): 
            fss.append(processed_results["units"]["FSS"][idx][frame])
        else:
            fss.append(0.0)

In [7]:
frame_sentence_score = pd.DataFrame({
    "sent_id" : sentences,
    "sentence" : sent_text,
    "word" : words,
    "fss_sent_centric" : fss,
    "frame" : frames
})

#frame_sentence_score.set_index([frame_sentence_score["sent_id"], frame_sentence_score["frame"]])
frame_sentence_score

Unnamed: 0,frame,fss_sent_centric,sent_id,sentence,word
0,feeling,0.167718,FNC-0830,Their complaint : the system discourages worki...,discourages
1,communication,0.173959,FNC-0830,Their complaint : the system discourages worki...,discourages
2,assessing,0.087120,FNC-0830,Their complaint : the system discourages worki...,discourages
3,people,0.000000,FNC-0830,Their complaint : the system discourages worki...,discourages
4,attempt_suasion,0.342822,FNC-0830,Their complaint : the system discourages worki...,discourages
5,emotion_directed,0.353178,FNC-0830,Their complaint : the system discourages worki...,discourages
6,text_creation,0.000000,FNC-0830,Their complaint : the system discourages worki...,discourages
7,cogitation,0.087120,FNC-0830,Their complaint : the system discourages worki...,discourages
8,experiencer_obj,0.087120,FNC-0830,Their complaint : the system discourages worki...,discourages
9,communication_manner,0.161175,FNC-0830,Their complaint : the system discourages worki...,discourages


In [9]:
frame_sentence_score.to_csv(
    "data/output/aggregated_FSS.csv",
    index = False
)
processed_results["annotations"].to_csv(
    "data/output/aggregated_FQS.csv"
)
processed_results["units"].to_csv(
    "data/output/aggregated_SQS.csv"
)
processed_results["workers"].to_csv(
    "data/output/aggregated_WQS.csv"
)