<img src="quality_match.png" width="200" height="200" align="right">

## Quality Match Task - Sherif Shawkat

In [1]:
#import libraries
import pandas as pd
import json
import matplotlib.pyplot as plt

In [2]:
#read the references file
references_df = pd.read_json('references.json')
references_df.head()

Unnamed: 0,img_4686,img_8607,img_5541,img_3218,img_3247,img_1876,img_6228,img_4653,img_5488,img_8591,...,img_3563,img_7393,img_7061,img_6877,img_2192,img_5282,img_0628,img_7736,img_1042,img_2866
is_bicycle,False,True,False,False,True,True,True,False,True,False,...,True,False,False,True,True,True,True,True,False,True


In [3]:
#read the anonymized project file
anonymized_df = pd.read_json('anonymized_project.json')
anonymized_df.head()

Unnamed: 0,results
root_node,"{'gui_type': 'discrete_answer', 'results': {'7..."


It seems the anonymized_project.json need some cleaning

In [4]:
#re-read the anonymized project file
f = open('anonymized_project.json')
data = json.load(f)

In [5]:
data_refined = data['results']['root_node']
anonymized_df = pd.DataFrame.from_dict(data_refined)
anonymized_df.head()

Unnamed: 0,gui_type,results
000e9703-686d-45c1-9772-9edb38ed2891,discrete_answer,{'results': [{'task_input': {'image_url': 'htt...
000eac10-afe2-4f3f-b4ed-c79b8cfbdcc1,discrete_answer,{'results': [{'task_input': {'image_url': 'htt...
0012d625-0d84-4ae9-973f-21acf84eab54,discrete_answer,{'results': [{'task_input': {'image_url': 'htt...
001460c2-712d-45d2-8afe-cdac237196a2,discrete_answer,{'results': [{'task_input': {'image_url': 'htt...
001873e5-920a-41e5-9a9c-386412b728b6,discrete_answer,{'results': [{'task_input': {'image_url': 'htt...


In [6]:
#create a new dataframe containing the flattened data of anonymized_project.json
anonymized_clean_df = pd.DataFrame(columns=['task_input', 'created_at', 'workpackage_total_size', 'loss',
       'project_node_input_id', 'project_node_output_id', 'task_output',
       'user', 'root_input', 'project_root_node_input_id', 'id', 'gui_type'])

In [7]:
#method to extract data from each row
def func(row):
  global anonymized_clean_df
  results = row.results['results']
  results_df = pd.DataFrame.from_dict(results)
  results_df['id'] = row.name
  results_df['gui_type'] = row.gui_type
  anonymized_clean_df = anonymized_clean_df.append(results_df.reset_index(drop=True))
  anonymized_clean_df.reset_index(drop=True,inplace=True)

In [None]:
#apply the function to anonymized dataframe
anonymized_df.apply(func,axis=1)

In [None]:
anonymized_clean_df.head()

In [None]:
#extract data from dictionaries in columns "user" and "task_output" then rename columns
anonymized_clean_df = anonymized_clean_df.join(pd.DataFrame(anonymized_clean_df['user'].tolist())
                                               .rename(columns={"vendor_id": "user_vendor_id", "id": "user_id",
                                                                "vendor_user_id":"user_vendor_user_id"}))
anonymized_clean_df = anonymized_clean_df.join(pd.DataFrame(anonymized_clean_df['task_output'].tolist()).
                                               rename(columns={"answer": "task_output_answer",
                                                               "cant_solve": "task_output_cant_solve",
                                                               "corrupt_data":"task_output_corrupt_data",
                                                               "duration_ms":"task_output_duration_ms"}))
anonymized_clean_df.drop(['user','task_output'], axis = 1, inplace=True)
anonymized_clean_df.head()

In [None]:
#extract image name
anonymized_clean_df['image_name'] = anonymized_clean_df['root_input'].apply(lambda x: x.get('image_url')).str[-12:].str.strip('.jpg')

In [None]:
#transpose the references dataframe
references_df = references_df.transpose()
references_df['image_name'] = references_df.index

In [None]:
#merge both dataframes
merged_df = anonymized_clean_df.merge(references_df, on='image_name')
merged_df.head()

### Task 1

In [None]:
######################### A ################################

#Number of annotators contributed to the dataset
merged_df['user_id'].nunique()

In [None]:
######################## B ##################################

#get the duration statistics
merged_df['task_output_duration_ms'].describe()

It seems the duration has invalid values, so we remove any negative values

In [None]:
#remove any negative durations, then get the statistics
merged_df = merged_df[merged_df['task_output_duration_ms'] > 0]
merged_df['task_output_duration_ms'].describe()

In [None]:
############# C ##################

#group by user ID and count occurrences per user
merged_df.groupby(['user_id']).size().sort_values(ascending=False)

In [None]:
############# D ################################


#create a new column "prediction" to compare the user answer with the ground truth
merged_df.loc[((merged_df['is_bicycle']==True) & (merged_df['task_output_answer']=='yes'))
              |((merged_df['is_bicycle']==False) & (merged_df['task_output_answer']=='no'))
              ,'prediction'] = 'correct'


merged_df['prediction'].fillna('wrong',inplace=True)

#for unsolvable tasks, the prediction is "no_answer"
merged_df.loc[merged_df['task_output_cant_solve']==True,"prediction"] = "no_answer"

#create column for image url
merged_df['img_url'] = merged_df['task_input'].apply(lambda x: x.get('image_url'))

In [None]:
#group by the image_url and the user answer, then count the number of yes or no
task_answer_grouped = merged_df.groupby(['img_url','task_output_answer']).size().unstack(fill_value=0)

In [None]:
#get the highly disagreed images among the users
task_answer_grouped['percentage'] = abs(task_answer_grouped['yes']-task_answer_grouped['no'])/10
task_answer_grouped.sort_values(by=['percentage'])

### Task 2

In [None]:
#for any corrupt data, the prediction is "no_answer"
merged_df.loc[merged_df['task_output_corrupt_data']==True,"prediction"] = "no_answer"

In [None]:
#get the percentage of can't solve tasks
merged_df[merged_df.task_output_cant_solve == True].shape[0]/merged_df.shape[0]

In [None]:
#get the percentage of corrupt data
merged_df[merged_df.task_output_corrupt_data == True].shape[0]/merged_df.shape[0]

The "can't solve" occurs with a percentage of 0.018% among the dataframe, while the "corrupt data" occur with a percentage of 0.0033%

In [None]:
#get unsolved tasks per user
merged_df[(merged_df.task_output_corrupt_data == True) | (merged_df.task_output_cant_solve == True)].groupby('user_id').size()

### Task 3

In [None]:
#group by is_bicycle and count occurrences of each
references_df.groupby('is_bicycle').count().reset_index()

In [None]:
#plot the references values
references_df.groupby('is_bicycle').count().reset_index().plot.bar(x='is_bicycle', y='image_name')
plt.legend(["count"]);

It seems the reference set is balanced

### Task 4

In [None]:
#group by the user_id then prediction
annotators_predictions = merged_df.groupby(['user_id','prediction']).size().unstack(fill_value=0)

#calculate the accuracy for each annotator
annotators_predictions['accuracy'] = annotators_predictions['correct']*100/(
    annotators_predictions['correct']+annotators_predictions['wrong']+annotators_predictions['no_answer'])

#sort the annotators by accuracy descending
annotators_predictions.sort_values(by=['accuracy'], ascending=False)

The best annotator has an accuracy of 94.8%, the worst annotator has an accuracy of 88.8%

In [None]:
#plot the annotators accuracy sorted descendingly
annotators_predictions.reset_index().sort_values(by=['accuracy'], ascending=False).plot.bar(
    x='user_id', y='accuracy');
plt.ylim(80, 100);