In [1]:
import numpy as np
import pandas as pd
import json
from collections import Counter

In [2]:
invalid_counts = {}

In [3]:
KEY1 = "A1"
KEY2 = "A2"

## Data Paths

In [4]:
user_data_path = "data/study/user_data.csv"
data_paths = {
    "response_data" : {
        KEY1 : "data/study/response_data_{}.csv".format(KEY1),
        KEY2 : "data/study/response_data_{}.csv".format(KEY2)
    },
    "demographics" : {
        KEY1 : "data/study/demographics_data_{}.csv".format(KEY1),
        KEY2 : "data/study/demographics_data_{}.csv".format(KEY2)
    }
}

# Join Dataframes for subsets

In [5]:
datasets = [KEY1, KEY2]
response_data_header = ["User ID", "Question Number", "Question", "Breed Key", "Image Name", "User Response", "Correct Response", "Response Time"]
response_data_dfs_raw = []
for dataset_id in datasets:
    df_raw = pd.read_csv(data_paths["response_data"][dataset_id], header=None)
    
    # Set header
    df_raw.columns = response_data_header
    
    # Join A1
    df_processed = df_raw.join(pd.DataFrame({'Dataset': [dataset_id for _ in range(len(df_raw))]}))
    
    if response_data_dfs_raw != []:
        previous_length = len(response_data_dfs_raw[-1])
        current_length = len(df_processed)
        new_upper_bound = previous_length + current_length
        df_processed.index = [i for i in range(previous_length, new_upper_bound)]        
    
    # Store
    response_data_dfs_raw.append(df_processed)

response_data_dfs = pd.concat(response_data_dfs_raw, axis=0)
response_data_dfs

Unnamed: 0,User ID,Question Number,Question,Breed Key,Image Name,User Response,Correct Response,Response Time,Dataset
0,21,0,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__142__dr2.png,B,True,6639,A1
1,21,1,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__166__dr10.png,B,True,3171,A1
2,21,2,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_A__152__dr2.png,A,True,1259,A1
3,21,3,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__24__dr5.png,B,True,1122,A1
4,21,4,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__96__dr7.png,B,True,1320,A1
5,21,5,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__91__dr10.png,B,True,1014,A1
6,21,6,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_A__76__dr7.png,A,True,1018,A1
7,21,7,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_A__103__dr2.png,A,True,1049,A1
8,21,8,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_B__26__dr2.png,B,True,906,A1
9,21,9,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/dog_A__164__dr2.png,A,True,1144,A1


# Update Image Name

In [6]:
new_image_names = []
for index, row in response_data_dfs.iterrows():
    row_breed_key = row["Breed Key"]
    row_image_name = row["Image Name"]
    row_breed_key = row_breed_key.strip().split("\n")
    row_breed_dict = { ele.split(":")[0].strip() : ele.split(":")[1].strip() for ele in row_breed_key}

    if "dog_A" in row_image_name:
        new_image_name = row_image_name.replace("dog_A", row_breed_dict["dog_A"])
    else:
        new_image_name = row_image_name.replace("dog_B", row_breed_dict["dog_B"])
    
    new_image_names.append(new_image_name)

In [7]:
response_data_dfs["Image Name"] = new_image_names

In [8]:
response_data_dfs.head()

Unnamed: 0,User ID,Question Number,Question,Breed Key,Image Name,User Response,Correct Response,Response Time,Dataset
0,21,0,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/saint_bernard__142__dr2...,B,True,6639,A1
1,21,1,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/saint_bernard__166__dr1...,B,True,3171,A1
2,21,2,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/basset_hound__152__dr2.png,A,True,1259,A1
3,21,3,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/saint_bernard__24__dr5.png,B,True,1122,A1
4,21,4,Is this image Dog Breed A or not Dog Breed A?,dog_A : basset_hound\r\ndog_B : saint_bernard\r\n,static/imgs/study/dogs/saint_bernard__96__dr7.png,B,True,1320,A1


## Check image counts

In [9]:
counts = Counter(response_data_dfs["Image Name"])
average_count_per_image = np.mean(counts.values())
print("Average count per image: {}".format(average_count_per_image))

Average count per image: 22.5


## Remove Bad Users

In [10]:
user_IDs = set(response_data_dfs["User ID"])

invalid_users = {}
valid_user_ids = []
for user_id in user_IDs:
    
    user_is_valid = True
    fail_reasons = []
    user_df = response_data_dfs[response_data_dfs["User ID"] == user_id]
    
    # CHECK NUMBER OF USER ENTRIES! Ensure 103
    # ========================================================
    num_user_entries = len(user_df)
    
    if num_user_entries < 103:
        fail_reason = "User {} does not have 103 entries - they have {}.".format(user_id, num_user_entries)
        fail_reasons.append(fail_reason)
        user_is_valid = False
        
    elif num_user_entries == 206:
        fail_reason = "User {} has 206 entries! Readjusting dataframe.".format(user_id)
        
        unique_sets = set(user_df["Dataset"])
        if unique_sets == 2:
            keep_set = datasets[np.random.randint(2)]
            user_df = user_df[user_df["Dataset"] == keep_set]
        else:
            user_df = user_df[:103]
    # ========================================================
    
    # CHECK THAT USERS DID NOT ENTER ONLY 1 ANSWER: e.g., all B's
    # ========================================================
    user_responses = user_df["User Response"]
    unique_responses = set(user_responses)
    
    if len(unique_responses) == 1:
        fail_reason = "User {} only entered {}'s".format(user_id, list(unique_responses)[0])
        fail_reasons.append(fail_reason)
        user_is_valid = False
    # ========================================================
    
    # CHECK THAT USERS PASSED ATTENTION CHECKS
    # ========================================================
    attention_check_threshold = 2
    attention_check_correct_responses = user_df[user_df["Image Name"].str.contains('attention_check')]["Correct Response"]
    attention_checks_passed = len(np.where(attention_check_correct_responses == True)[0])
    if attention_checks_passed < attention_check_threshold:
        fail_reason = "User {} only passed {}/4 attention checks!".format(user_id, attention_checks_passed)
        fail_reasons.append(fail_reason)
        user_is_valid = False
    # ========================================================
    
    if not user_is_valid:
        invalid_users[user_id] = {"reasons" : fail_reasons}
    else:
        valid_user_ids.append(user_id)
   
invalid_user_ids = invalid_users.keys()

num_total_users = len(user_IDs)
print("Total users: {}".format(num_total_users))

print("\n")

print("Invalid Users")
print("-------------")
for user_i, (invalid_user_id, val) in enumerate(invalid_users.items(), 1):
    print("\t{}. User {} -- Reasons: {}".format(user_i, invalid_user_id, val["reasons"][0]))

print("\n")

print("Valid Users")
print("-----------")
for valid_i, valid_user_id in enumerate(valid_user_ids):
    print("\t{:2.0f}. User {}".format(valid_i, valid_user_id))

Total users: 31


Invalid Users
-------------
	1. User 66 -- Reasons: User 66 only passed 0/4 attention checks!
	2. User 3 -- Reasons: User 3 only passed 0/4 attention checks!
	3. User 37 -- Reasons: User 37 only passed 0/4 attention checks!
	4. User 7 -- Reasons: User 7 only passed 0/4 attention checks!
	5. User 8 -- Reasons: User 8 only passed 0/4 attention checks!
	6. User 9 -- Reasons: User 9 only passed 1/4 attention checks!
	7. User 48 -- Reasons: User 48 only passed 0/4 attention checks!
	8. User 17 -- Reasons: User 17 only passed 0/4 attention checks!
	9. User 52 -- Reasons: User 52 only passed 0/4 attention checks!
	10. User 53 -- Reasons: User 53 only passed 0/4 attention checks!
	11. User 57 -- Reasons: User 57 only passed 0/4 attention checks!


Valid Users
-----------
	 0. User 18
	 1. User 21
	 2. User 22
	 3. User 23
	 4. User 26
	 5. User 30
	 6. User 32
	 7. User 34
	 8. User 40
	 9. User 43
	10. User 44
	11. User 49
	12. User 50
	13. User 55
	14. User 56
	15. User 60


In [11]:
set_ = "Set{}".format(KEY1[0])
invalid_counts[set_] = 20 - len(valid_user_ids)

In [12]:
invalid_counts

{'SetA': 0}

## Get correctness of images

In [13]:
# Unique Image list
unique_image_list = set(response_data_dfs["Image Name"])

# Remove attention checks
downsampled_image_list = [image for image in unique_image_list if "attention_check" not in image]

full_data = {}
for dog_image in downsampled_image_list:

    full_data[dog_image] = {}
    
    # Get dog DF
    dog_df = response_data_dfs[response_data_dfs["Image Name"] == dog_image]
    
    # Users associated with current image
    user_df = list(dog_df["User ID"])
    
    # Get valid indices for df
    valid_indices = []
    for user_i, user_id in enumerate(user_df):
        if user_id in valid_user_ids:
            valid_indices.append(user_i)
            
    # Get observations for current image only from valid users
    valid_images_df = dog_df.iloc[valid_indices]

    # Count number of images in df
    valid_image_count = len(valid_images_df["Correct Response"])
       
    # Setup breed key and inverted breed key
    # ==========================================================================================
    breed_key = list(valid_images_df["Breed Key"])[0]
    breed_key = [breed.strip() for breed in breed_key.split("\n") if breed.strip() != ""]
    breed_key = {breed.split(" : ")[0] : breed.split(" : ")[1] for breed in breed_key}
    breed_key_inv = { val : key for key, val in breed_key.items()}
    # ==========================================================================================
    
    # Get correctness
    correct_count = len(valid_images_df[valid_images_df["Correct Response"] == True])
    correctness = correct_count / float(valid_image_count)
    full_data[dog_image]["correctness"] = correctness
    
    # Get confidence
    # First, find majority vote and label
    label_counts = Counter(valid_images_df["User Response"])
    
    user_response_1 = valid_images_df["User Response"].iloc[0]
    correct_response_1 = valid_images_df["Correct Response"].iloc[0]
    
    if not correct_response_1:
        user_response_1 = "B" if user_response_1 == "A" else "A"
    actual_label = "dog_{}".format(user_response_1)
    actual_label = breed_key[actual_label]
    
    majority_label = "dog_A" if label_counts["A"] > label_counts["B"] else "dog_B"
    majority_label = breed_key[majority_label]
    
    
    confidence = label_counts["A"] / float(valid_image_count) if label_counts["A"] > label_counts["B"] else label_counts["B"] / float(valid_image_count)
    
    # update data struct
    full_data[dog_image]["confidence"] = {
        "actual_label" : actual_label, 
        "majority_label" : majority_label, 
        "value" : confidence
    }
    
    

In [14]:
data = full_data

In [15]:
data

{'static/imgs/study/dogs/basset_hound__103__dr10.png': {'confidence': {'actual_label': 'basset_hound',
   'majority_label': 'basset_hound',
   'value': 0.5217391304347826},
  'correctness': 0.5217391304347826},
 'static/imgs/study/dogs/basset_hound__103__dr2.png': {'confidence': {'actual_label': 'basset_hound',
   'majority_label': 'basset_hound',
   'value': 0.9130434782608695},
  'correctness': 0.9130434782608695},
 'static/imgs/study/dogs/basset_hound__103__dr5.png': {'confidence': {'actual_label': 'basset_hound',
   'majority_label': 'basset_hound',
   'value': 0.7391304347826086},
  'correctness': 0.7391304347826086},
 'static/imgs/study/dogs/basset_hound__103__dr7.png': {'confidence': {'actual_label': 'basset_hound',
   'majority_label': 'basset_hound',
   'value': 0.6086956521739131},
  'correctness': 0.6086956521739131},
 'static/imgs/study/dogs/basset_hound__111__dr10.png': {'confidence': {'actual_label': 'basset_hound',
   'majority_label': 'saint_bernard',
   'value': 0.5652

# Policy Generation

# Policies
 1. Random (basically interspersed)
 2. All low confidence (correctness interspersed)
 3. All high confidence (correctness interspersed)
 4. Low to high confidence
 5. High to low confidence

### High / Low Confidence Determined Empirically
    - Take the median

In [16]:
get_median_confidence = lambda x : np.mean([i["confidence"]["value"] for i in x.values()])

In [17]:
confidence_threshold = get_median_confidence(data)
confidence_threshold

0.8096894409937887

In [18]:
policy_data = {}

### Policy 1: Random

In [19]:
policy_1_raw = data.items()
np.random.shuffle(policy_1_raw)
policy_1 = []
for key, val in policy_1_raw:
    new_data = (key, {
        "confidence" : val["confidence"]["value"], 
        "correctness" : val["correctness"],
        "majority_label" : val["confidence"]["majority_label"]})
    policy_1.append(new_data)
print("Shuffled List")
policy_1[:5]

Shuffled List


[('static/imgs/study/dogs/saint_bernard__18__dr7.png',
  {'confidence': 1.0, 'correctness': 1.0, 'majority_label': 'saint_bernard'}),
 ('static/imgs/study/dogs/basset_hound__149__dr2.png',
  {'confidence': 0.8695652173913043,
   'correctness': 0.8695652173913043,
   'majority_label': 'basset_hound'}),
 ('static/imgs/study/dogs/basset_hound__53__dr5.png',
  {'confidence': 0.8571428571428571,
   'correctness': 0.8571428571428571,
   'majority_label': 'basset_hound'}),
 ('static/imgs/study/dogs/saint_bernard__171__dr5.png',
  {'confidence': 0.9130434782608695,
   'correctness': 0.9130434782608695,
   'majority_label': 'saint_bernard'}),
 ('static/imgs/study/dogs/basset_hound__39__dr7.png',
  {'confidence': 0.782608695652174,
   'correctness': 0.782608695652174,
   'majority_label': 'basset_hound'})]

In [20]:
policy_data["policy_1"] = policy_1

### Policy 2: All low confidence (correctness interspersed)

In [21]:
policy_2 = []
num_query_images = 0
for key, val in data.items():
    if val["confidence"]["value"] < confidence_threshold:
        query_this_point = True
        num_query_images += 1
    else:
        query_this_point = False
        
    new_data = (key, {
        "confidence" : val["confidence"]["value"], 
        "correctness" : val["correctness"],
        "majority_label" : val["confidence"]["majority_label"],
        "query" : query_this_point})
    policy_2.append(new_data)

np.random.shuffle(policy_2)
policy_2[:4]

print(num_query_images)

83


In [22]:
policy_data["policy_2"] = policy_2

### Policy 3: All high confidence (correctness interspersed)

In [23]:
policy_3 = []
for key, val in data.items():
    if val["confidence"]["value"] >= confidence_threshold:
        query_this_point = True
    else:
        query_this_point = False
        
    new_data = (key, {
        "confidence" : val["confidence"]["value"], 
        "correctness" : val["correctness"],
        "majority_label" : val["confidence"]["majority_label"],
        "query" : query_this_point})
    policy_3.append(new_data)

np.random.shuffle(policy_3)
policy_3[:4]

[('static/imgs/study/dogs/basset_hound__123__dr7.png',
  {'confidence': 0.7142857142857143,
   'correctness': 0.7142857142857143,
   'majority_label': 'basset_hound',
   'query': False}),
 ('static/imgs/study/dogs/basset_hound__149__dr10.png',
  {'confidence': 0.7391304347826086,
   'correctness': 0.7391304347826086,
   'majority_label': 'basset_hound',
   'query': False}),
 ('static/imgs/study/dogs/basset_hound__103__dr2.png',
  {'confidence': 0.9130434782608695,
   'correctness': 0.9130434782608695,
   'majority_label': 'basset_hound',
   'query': True}),
 ('static/imgs/study/dogs/basset_hound__152__dr5.png',
  {'confidence': 0.782608695652174,
   'correctness': 0.782608695652174,
   'majority_label': 'basset_hound',
   'query': False})]

In [24]:
policy_data["policy_3"] = policy_3

### Policy 4: Low to high confidence

In [25]:
key_dict = data.keys()
dtype=[('a', float), ('b', float), ('c', 'U25'), ('d', int)]

policy_4 = np.array([(val["confidence"]["value"], val["correctness"], val["confidence"]["majority_label"], key_dict.index(key)) for key, val in data.items()], dtype=dtype)
policy_4 = np.sort(policy_4, order=['a'])
policy_4 = [(key_dict[i[3]], {"confidence" : i[0], "correctness" : i[1], "majority_label" : str(i[2])}) for i in policy_4]
for i, ele in enumerate(policy_4):
    if i > 4: break
    print(ele)


('static/imgs/study/dogs/basset_hound__76__dr10.png', {'majority_label': 'saint_bernard', 'confidence': 0.5217391304347826, 'correctness': 0.4782608695652174})
('static/imgs/study/dogs/basset_hound__103__dr10.png', {'majority_label': 'basset_hound', 'confidence': 0.5217391304347826, 'correctness': 0.5217391304347826})
('static/imgs/study/dogs/basset_hound__137__dr7.png', {'majority_label': 'basset_hound', 'confidence': 0.5217391304347826, 'correctness': 0.5217391304347826})
('static/imgs/study/dogs/basset_hound__90__dr10.png', {'majority_label': 'saint_bernard', 'confidence': 0.5652173913043478, 'correctness': 0.43478260869565216})
('static/imgs/study/dogs/basset_hound__111__dr10.png', {'majority_label': 'saint_bernard', 'confidence': 0.5652173913043478, 'correctness': 0.43478260869565216})


In [26]:
policy_data["policy_4"] = policy_4

### Policy 5: High to low confidence

In [27]:
policy_5 = policy_4[::-1]
for i, ele in enumerate(policy_5):
    if i > 4: break
    print(ele)

('static/imgs/study/dogs/saint_bernard__116__dr2.png', {'majority_label': 'saint_bernard', 'confidence': 1.0, 'correctness': 1.0})
('static/imgs/study/dogs/saint_bernard__50__dr10.png', {'majority_label': 'saint_bernard', 'confidence': 1.0, 'correctness': 1.0})
('static/imgs/study/dogs/saint_bernard__53__dr2.png', {'majority_label': 'saint_bernard', 'confidence': 1.0, 'correctness': 1.0})
('static/imgs/study/dogs/saint_bernard__18__dr7.png', {'majority_label': 'saint_bernard', 'confidence': 1.0, 'correctness': 1.0})
('static/imgs/study/dogs/saint_bernard__129__dr2.png', {'majority_label': 'saint_bernard', 'confidence': 1.0, 'correctness': 1.0})


In [28]:
policy_data["policy_5"] = policy_5

### Write Data

In [29]:
study_out = "Set{}".format(KEY1[0])
study_out

'SetA'

In [30]:
with open("../2_experiment_primary/studies/{}/static/data/study_1_full_data.json".format(study_out), "w") as outfile:
    json.dump(data, outfile)

In [31]:
with open("../2_experiment_primary/studies/{}/static/data/study_1_results.json".format(study_out), "w") as outfile:
    json.dump(policy_data, outfile)