# Read data

In [1]:
import pandas as pd

frame_def = pd.read_csv("data/frame_definitions.csv")
frame_names = [f.lower().replace("f:", "") for f in frame_def["frame"]]
frame_names.append("none")

In [2]:
# !pip install crowdtruth

import crowdtruth
from crowdtruth.configuration import DefaultConfig

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


crowd_input_folder = "data/input/"

class FramesConfig(DefaultConfig):
    inputColumns = ["Input.word_phrase", "Input.frames", "Input.sentence", "Input.beg", "Input.end",
                    "Input.vid"]
    outputColumns = ["Answer.FrameType"]
    
    # processing of a closed task
    open_ended_task = False
    annotation_vector = frame_names
    annotation_separator = "|"
    
    def processJudgments(self, judgments):
        for col in self.outputColumns:
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('None of the above.','none'))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(' ','_'))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

fc = FramesConfig()
data, config = crowdtruth.load(
    directory = crowd_input_folder,
    config = fc
)

INFO:root:Config loaded
INFO:root:Found 2 files
INFO:root:Processing Batch_3109054_batch_results.csv
INFO:root:Values not stored as dictionary
  units = units.reindex_axis(sorted(units.columns), axis=1)
  job = job.reindex_axis(sorted(job.columns), axis=1)
INFO:root:Processing Batch_3110394_batch_results.csv
INFO:root:Values not stored as dictionary


# Run CrowdTruth metrics

In [3]:
processed_results = crowdtruth.run(data, config)

INFO:root:Using experimental version a2
  judgments.set_value(index, col, expandedVector(row[col], units.at[row['unit'], col]))
INFO:root:1 iterations; max d= 0.9999 ; wqs d= 0.660383410344; sqs d= 0.569290521806; rqs d= 0.346151838391
INFO:root:2 iterations; max d= 0.274672673243 ; wqs d= 0.136631618727; sqs d= 0.105936167348; rqs d= 0.0301795497187
INFO:root:3 iterations; max d= 0.0402298033932 ; wqs d= 0.00844655300082; sqs d= 0.00478026127785; rqs d= 0.00199383661514
INFO:root:4 iterations; max d= 0.0199262147214 ; wqs d= 0.00281295297744; sqs d= 0.00509890500271; rqs d= 0.00156463335605
INFO:root:5 iterations; max d= 0.00546546807128 ; wqs d= 0.000701303463544; sqs d= 0.000281348944458; rqs d= 9.3437371847e-05
INFO:root:6 iterations; max d= 0.00156982155691 ; wqs d= 0.000181610761728; sqs d= 0.00034760782813; rqs d= 0.000111223247025
INFO:root:7 iterations; max d= 0.000520335382856 ; wqs d= 5.97969879876e-05; sqs d= 2.75424275487e-05; rqs d= 7.47517554927e-06


In [4]:
import operator

dlist = []
max_frame = []
max_fss = []
for idx in processed_results["units"].index:
    if idx != "unit_annotation_score":
        aux_dict = { k:v for k, v in processed_results["units"]["unit_annotation_score"][idx].iteritems() if v }
        dlist.append(aux_dict)
        max_frame.append(max(aux_dict.iteritems(), key=operator.itemgetter(1))[0])
        max_fss.append(max(aux_dict.values()))
#dlist.append({})
#max_frame.append("")
#max_fss.append(0)

processed_results["units"]["FSS"] = pd.Series(dlist, index=processed_results["units"].index)
processed_results["units"]["max_frame"] = pd.Series(max_frame, index=processed_results["units"].index)
processed_results["units"]["max_fss"] = pd.Series(max_fss, index=processed_results["units"].index)

#processed_results["units"].to_csv("pilot_data_out_sentence_metrics.csv")

# Output results

In [5]:
processed_results["units"].head()

Unnamed: 0_level_0,duration,input.beg,input.end,input.frames,input.sentence,input.vid,input.word_phrase,job,output.FrameType,output.FrameType.annotations,output.FrameType.unique_annotations,worker,uqs,unit_annotation_score,uqs_initial,unit_annotation_score_initial,FSS,max_frame,max_fss
unit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
302U8RURJZDT5JFAPQV82BMQ2R0VNH,505.4,29,40,"f:Feeling,f:Communication,f:Assessing,f:People...",Their complaint : the system discourages worki...,FNC-0830,discourages,data/input//Batch_3109054_batch_results,"{u'cause_to_experience': 0, u'taking_sides': 0...",26,11,15,0.21237,"{u'cause_to_experience': 0.0, u'taking_sides':...",0.170398,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'none': 0.0337792797442, u'experiencer_obj':...",subjective_influence,0.364921
306W7JMRYYATTYU0E7FPJ22OA4L8B0,281.733333,60,64,"f:Leadership,f:Have_associated,f:Connectors,f:...",Such sights include the colorful `` tams '' --...,FNC-3159,worn,data/input//Batch_3109054_batch_results,"{u'cause_to_experience': 0, u'taking_sides': 0...",22,9,15,0.661335,"{u'cause_to_experience': 0.0, u'taking_sides':...",0.436241,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'wearing': 0.87332556013, u'none': 0.0316614...",wearing,0.873326
307L9TDWJY49TADR5GP2JGU7B77N3C,414.666667,126,130,"f:Aggregate,f:Communication,f:Working_on,f:Peo...","In 1844 , American explorer John C. Fremont pa...",FNC-1653,mail,data/input//Batch_3109054_batch_results,"{u'cause_to_experience': 0, u'taking_sides': 0...",17,8,15,0.196213,"{u'cause_to_experience': 0.0, u'taking_sides':...",0.174895,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'sending': 0.111839341344, u'none': 0.069681...",sent_items,0.38203
30EMX9PEVKVC5LD3YIYNZWLF9S1KSW,499.428571,35,42,"f:Means,f:Domain,f:Expertise,f:Fields,f:Gizmo",We 've expanded Goodwill 's proven methods to ...,FNC-1750,methods,data/input//Batch_3109054_batch_results,"{u'cause_to_experience': 0, u'taking_sides': 0...",18,5,14,0.364124,"{u'cause_to_experience': 0.0, u'taking_sides':...",0.263741,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'fields': 0.131356281396, u'none': 0.1001609...",means,0.601206
30EMX9PEVKVC5LD3YIYNZWLF9S1SK4,140.8,111,119,"f:Communication,f:People,f:Offering,f:Supply,f...","Excalibur , the MGM Grand , the Luxor , and Ne...",FNC-1851,offering,data/input//Batch_3109054_batch_results,"{u'cause_to_experience': 0, u'taking_sides': 0...",20,5,15,0.671293,"{u'cause_to_experience': 0.0, u'taking_sides':...",0.546183,"{u'cause_to_experience': 0.0, u'taking_sides':...","{u'communication': 0.0378829079711, u'offering...",offering,0.875457


In [6]:
unique_sentences = processed_results["units"]["input.sentence"]
unique_frames = config.annotation_vector

sentences = []
frames = []
words = []
sent_text = []
fss = []

for idx in processed_results["units"].index:
    sent_frames = processed_results["units"]["input.frames"][idx].split(",")
    sent_frames = [x.replace('f:','').lower() for x in sent_frames]
    for frame in sent_frames:
        sentences.append(processed_results["units"]["input.vid"][idx])
        words.append(processed_results["units"]["input.word_phrase"][idx])
        sent_text.append(processed_results["units"]["input.sentence"][idx])
        frames.append(frame)
        if frame in processed_results["units"]["FSS"][idx].keys(): 
            fss.append(processed_results["units"]["FSS"][idx][frame])
        else:
            fss.append(0.0)

In [9]:
frame_sentence_score = pd.DataFrame({
    "sent_id" : sentences,
    "sentence" : sent_text,
    "word" : words,
    "fss_sent_centric" : fss,
    "frame" : frames
})

#frame_sentence_score.set_index([frame_sentence_score["sent_id"], frame_sentence_score["frame"]])
frame_sentence_score.head()

Unnamed: 0,frame,fss_sent_centric,sent_id,sentence,word
0,feeling,0.167718,FNC-0830,Their complaint : the system discourages worki...,discourages
1,communication,0.173959,FNC-0830,Their complaint : the system discourages worki...,discourages
2,assessing,0.08712,FNC-0830,Their complaint : the system discourages worki...,discourages
3,people,0.0,FNC-0830,Their complaint : the system discourages worki...,discourages
4,attempt_suasion,0.34282,FNC-0830,Their complaint : the system discourages worki...,discourages


In [9]:
frame_sentence_score.to_csv(
    "data/output/aggregated_FSS.csv",
    index = False
)
processed_results["annotations"].to_csv(
    "data/output/aggregated_FQS.csv"
)
processed_results["units"].to_csv(
    "data/output/aggregated_SQS.csv"
)
processed_results["workers"].to_csv(
    "data/output/aggregated_WQS.csv"
)