In [61]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import matplotlib.patches as patches
from matplotlib.pyplot import cm
import os, json
import pandas as pd
import numpy as np
import seaborn as sns
import math
import numpy as np
from scipy.stats import ttest_ind, ttest_ind_from_stats
from scipy.special import stdtr
from scipy.stats import binom_test
from statsmodels.stats.proportion import proportions_ztest

def get_ranking_representations_in_order(dict):
    ranking_data = dict["ranking"]
    task_order = dict["treatmentOrder"]
    rankings_per_task = []
    for i in range(0,len(ranking_data)):
        if ranking_data[i]["category"] == "rankingRepresentation":
            rankings_per_task.append(ranking_data[i]["ranking"])
    sorted_rankings_by_task = [0] * 3;
    sorted_rankings_by_task[task_order[0]] = rankings_per_task[0]
    sorted_rankings_by_task[task_order[1]] = rankings_per_task[1]
    sorted_rankings_by_task[task_order[2]] = rankings_per_task[2]
    return sorted_rankings_by_task

def get_task_actions_in_order(dict):
    ranking_data = dict["ranking"]
    task_order = dict["treatmentOrder"]
    actions_per_task = []
    for i in range(0,len(ranking_data)):
        if ranking_data[i]["category"] == "rankingRepresentation":
            actions = []
            for j in range(i+1,len(ranking_data)):
                if ranking_data[j]["category"] == "serverEvent" or ranking_data[j]["category"] == "timeStamps":
                    i = j
                    break
                actions.append(ranking_data[j])
            actions_per_task.append(actions)
    sorted_actions_by_task = [0] * 3;
    sorted_actions_by_task[task_order[0]] = actions_per_task[0]
    sorted_actions_by_task[task_order[1]] = actions_per_task[1]
    sorted_actions_by_task[task_order[2]] = actions_per_task[2]
    return sorted_actions_by_task
        
def filter_for_choices(actions, ranking):
    selected_ids = []
    for action in actions:
        if action["eventName"] == "select":
            selected_ids.append(action["itemID"])
        if action["eventName"] == "unselect":
            selected_ids.remove(action["itemID"])
        if action["eventName"] == "fourthElementSelected":
            selected_ids.remove(action["itemID"])
    choices = []
    for id in selected_ids:
        for cand in ranking:
            if id == cand["id"]:
                choices.append(cand)
    return choices

def get_selected_female_candidates(choice_nr, choices, cummulative):
    if choice_nr <=0 or choice_nr>4:
        raise Exception("choice nr has to be in [1;4]")
    selected = 0
    if cummulative:
        for i in range(0,choice_nr):
            if choices[i]["gender"] == 'f':
                selected+=1
    else:
        try:
            if choices[choice_nr-1]["gender"] == 'f':
                selected = 1
        except:
            print(choices)
            print(choice_nr-1)
    return selected

def get_survey_answers(dict):
    survey = dict["survey"]
    answers = []
    for entry in survey:
        if entry["category"] == "surveyQuestionAnswers":
            answers.append(entry)
    return answers

def get_seen_females(seen_candidates):
    seen = 0
    for entry in seen_candidates:
        if entry["gender"] == 'f':
            seen+=1
    return seen

def get_demographics_answers(survey_answers):
    demographics = []
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 6 and len(answer["answers"])>=4:
            for entry in answer["answers"]:
                demographics.append(entry)
    return demographics

def get_user_gender(demographics):
    if demographics[0] == 'somethingElse':
        return demographics[1]
    else:
        return demographics[0].split(' ')[0]

def get_user_age(demographics):
    if len(demographics)>4:
        age_string = demographics[2].split(' ')[0]
    else:
        age_string = demographics[1].split(' ')[0]
    if age_string == 'noAnswer':
        return age_string
    if age_string =='ge55':
        return 60
    age_string_low = age_string[0:2]
    age_string_high = age_string[2:4]
    return (int(age_string_low) + int(age_string_high))/2.0

def get_user_education(demographics):
    if len(demographics)>4:
        edu_string = demographics[3].split(' ')[0]
    else:
        edu_string = demographics[2].split(' ')[0]
    return edu_string

def get_user_income(demographics):
    if len(demographics)>4:
        inc_string = demographics[4].split(' ')[0]
    else:
        inc_string = demographics[3].split(' ')[0]
    if inc_string == 'noAnswer':
        return inc_string
    if inc_string =='120kandMore':
        return 135
    if inc_string == 'less10k':
        return 5
    if inc_string == '90k119k':
        return 104.5
    inc_string_low = inc_string[0:2]
    inc_string_high = inc_string[3:5]
    return (int(inc_string_low) + int(inc_string_high))/2.0

def get_question_trust_answer(survey_answers):
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 1 and len(answer["answers"])==1:
            return int(answer["answers"][0].split(' ')[0])
        elif "question" in answer and answer["question"] == 1 and len(answer["answers"])>1:
            raise Exception("Something went wrong with the Trust answer")

def get_feature_importance_answer_moving(survey_answers):
    answers_moving = []
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 2 and len(answer["answers"])==4:
            for entry in answer["answers"]:
                answers_moving.append(int(entry.split(' ')[0]))
    return answers_moving

def get_feature_importance_answer_event(survey_answers):
    answers_moving = []
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 3 and len(answer["answers"])==4:
            for entry in answer["answers"]:
                answers_moving.append(int(entry.split(' ')[0]))
    return answers_moving

def get_feature_importance_answer_shopping(survey_answers):
    answers_moving = []
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 4 and len(answer["answers"])==4:
            for entry in answer["answers"]:
                answers_moving.append(int(entry.split(' ')[0]))
    return answers_moving

def get_text_field_answer(survey_answers):
    for answer in survey_answers:
        if "question" in answer and answer["question"] == 5 and len(answer["answers"][0])>=40:
            return answer["answers"][0]

def expand_survey_answers(data):
    user_genders = []
    user_incomes = []
    user_education = []
    user_ages = []
    
    trust_answers = []
    
    feature_tasks_moving = []
    feature_reviews_moving = []
    feature_reliable_moving = []
    feature_gender_moving = []
    
    feature_tasks_event = []
    feature_reviews_event = []
    feature_reliable_event = []
    feature_gender_event = []
    
    feature_tasks_shopping = []
    feature_reviews_shopping = []
    feature_reliable_shopping = []
    feature_gender_shopping = []
    
    text_answers = []
    
    for i in range(0,len(data)):
        row = data.loc[i]
        demographics = get_demographics_answers(row["survey_answers"])
        user_genders.append(get_user_gender(demographics))
        user_incomes.append(get_user_income(demographics))
        user_education.append(get_user_education(demographics))
        user_ages.append(get_user_age(demographics))
        
        trust_answers.append(get_question_trust_answer(row["survey_answers"]))
        
        features_moving = get_feature_importance_answer_moving(row["survey_answers"])
        feature_tasks_moving.append(features_moving[0])
        feature_reviews_moving.append(features_moving[1])
        feature_reliable_moving.append(features_moving[2])
        feature_gender_moving.append(features_moving[3])
        
        features_event = get_feature_importance_answer_event(row["survey_answers"])
        feature_tasks_event.append(features_event[0])
        feature_reviews_event.append(features_event[1])
        feature_reliable_event.append(features_event[2])
        feature_gender_event.append(features_event[3])
        
        features_shopping = get_feature_importance_answer_shopping(row["survey_answers"])
        feature_tasks_shopping.append(features_shopping[0])
        feature_reviews_shopping.append(features_shopping[1])
        feature_reliable_shopping.append(features_shopping[2])
        feature_gender_shopping.append(features_shopping[3])
        
        text_answers.append(get_text_field_answer(row["survey_answers"]))
        
    data["user_gender"] = user_genders
    data["user_income"] = user_incomes
    data["user_education"] = user_education
    data["user_age"] = user_ages
    
    data["trust_answer"] = trust_answers
    
    data["feature_tasks_moving"] = feature_tasks_moving
    data["feature_reviews_moving"] = feature_reviews_moving
    data["feature_reliable_moving"] = feature_reliable_moving
    data["feature_gender_moving"] = feature_gender_moving
    
    data["feature_tasks_event"] = feature_tasks_event
    data["feature_reviews_event"] = feature_reviews_event
    data["feature_reliable_event"] = feature_reliable_event
    data["feature_gender_event"] = feature_gender_event
    
    data["feature_tasks_shopping"] = feature_tasks_shopping
    data["feature_reviews_shopping"] = feature_reviews_shopping
    data["feature_reliable_shopping"] = feature_reliable_shopping
    data["feature_gender_shopping"] = feature_gender_shopping
    
    data["text_answer"] = text_answers
    del data["survey_answers"]
    return data

def expand_user_data(data):
    selected_first_choice_moving = []
    selected_second_choice_moving = []
    selected_third_choice_moving = []
    selected_fourth_choice_moving = []
    seen_female_moving = []
    
    selected_first_1_choices_moving = []
    selected_first_2_choices_moving = []
    selected_first_3_choices_moving = []
    selected_first_4_choices_moving = []
    
    selected_first_1_choices_moving_male = []
    selected_first_2_choices_moving_male = []
    selected_first_3_choices_moving_male = []
    selected_first_4_choices_moving_male = []
    
    selected_first_choice_event = []
    selected_second_choice_event = []
    selected_third_choice_event = []
    selected_fourth_choice_event = []
    seen_female_event = []
    
    selected_first_1_choices_event = []
    selected_first_2_choices_event = []
    selected_first_3_choices_event = []
    selected_first_4_choices_event = []
    
    selected_first_1_choices_event_male = []
    selected_first_2_choices_event_male = []
    selected_first_3_choices_event_male = []
    selected_first_4_choices_event_male = []
    
    selected_first_choice_shopping = []
    selected_second_choice_shopping = []
    selected_third_choice_shopping = []
    selected_fourth_choice_shopping = []
    seen_female_shopping = []
    
    selected_first_1_choices_shopping = []
    selected_first_2_choices_shopping = []
    selected_first_3_choices_shopping = []
    selected_first_4_choices_shopping = []
    
    selected_first_1_choices_shopping_male = []
    selected_first_2_choices_shopping_male = []
    selected_first_3_choices_shopping_male = []
    selected_first_4_choices_shopping_male = []
    for i in range(0,len(data)):
        row = data.loc[i]
        if len(row["moving_choices"]) !=4 or len(row["event_choices"]) !=4 or len(row["shopping_choices"]) !=4:
            print(row["uID"])
            
        seen_female_moving.append(get_seen_females(row["moving_ranking"]))
        selected_first_choice_moving.append(get_selected_female_candidates(1, row["moving_choices"], False))
        selected_second_choice_moving.append(get_selected_female_candidates(2, row["moving_choices"], False))
        selected_third_choice_moving.append(get_selected_female_candidates(3, row["moving_choices"], False))
        selected_fourth_choice_moving.append(get_selected_female_candidates(4, row["moving_choices"], False))
        
        a = get_selected_female_candidates(1, row["moving_choices"], True)
        b = get_selected_female_candidates(2, row["moving_choices"], True)
        c = get_selected_female_candidates(3, row["moving_choices"], True)
        d = get_selected_female_candidates(4, row["moving_choices"], True)
        selected_first_1_choices_moving.append(a)
        selected_first_2_choices_moving.append(b)
        selected_first_3_choices_moving.append(c)
        selected_first_4_choices_moving.append(d)
        
        selected_first_1_choices_moving_male.append(1-a)
        selected_first_2_choices_moving_male.append(2-b)
        selected_first_3_choices_moving_male.append(3-c)
        selected_first_4_choices_moving_male.append(4-d)
        
        seen_female_event.append(get_seen_females(row["event_ranking"]))
        selected_first_choice_event.append(get_selected_female_candidates(1, row["event_choices"], False))
        selected_second_choice_event.append(get_selected_female_candidates(2, row["event_choices"], False))
        selected_third_choice_event.append(get_selected_female_candidates(3, row["event_choices"], False))
        selected_fourth_choice_event.append(get_selected_female_candidates(4, row["event_choices"], False))
        
        a = get_selected_female_candidates(1, row["event_choices"], True)
        b = get_selected_female_candidates(2, row["event_choices"], True)
        c = get_selected_female_candidates(3, row["event_choices"], True)
        d = get_selected_female_candidates(4, row["event_choices"], True)
        selected_first_1_choices_event.append(a)
        selected_first_2_choices_event.append(b)
        selected_first_3_choices_event.append(c)
        selected_first_4_choices_event.append(d)
        
        selected_first_1_choices_event_male.append(1-a)
        selected_first_2_choices_event_male.append(2-b)
        selected_first_3_choices_event_male.append(3-c)
        selected_first_4_choices_event_male.append(4-d)
        
        seen_female_shopping.append(get_seen_females(row["shopping_ranking"]))
        selected_first_choice_shopping.append(get_selected_female_candidates(1, row["shopping_choices"], False))
        selected_second_choice_shopping.append(get_selected_female_candidates(2, row["shopping_choices"], False))
        selected_third_choice_shopping.append(get_selected_female_candidates(3, row["shopping_choices"], False))
        selected_fourth_choice_shopping.append(get_selected_female_candidates(4, row["shopping_choices"], False))
        
        a = get_selected_female_candidates(1, row["shopping_choices"], True)
        b = get_selected_female_candidates(2, row["shopping_choices"], True)
        c = get_selected_female_candidates(3, row["shopping_choices"], True)
        d = get_selected_female_candidates(4, row["shopping_choices"], True)
        selected_first_1_choices_shopping.append(a)
        selected_first_2_choices_shopping.append(b)
        selected_first_3_choices_shopping.append(c)
        selected_first_4_choices_shopping.append(d)
        
        selected_first_1_choices_shopping_male.append(1-a)
        selected_first_2_choices_shopping_male.append(2-b)
        selected_first_3_choices_shopping_male.append(3-c)
        selected_first_4_choices_shopping_male.append(4-d)
    
    data["selected_first_choice_moving"] = selected_first_choice_moving
    data["selected_second_choice_moving"] = selected_second_choice_moving
    data["selected_third_choice_moving"] = selected_third_choice_moving
    data["selected_fourth_choice_moving"] = selected_fourth_choice_moving
    
    data["selected_first_1_choices_moving"] = selected_first_1_choices_moving
    data["selected_first_2_choices_moving"] = selected_first_2_choices_moving
    data["selected_first_3_choices_moving"] = selected_first_3_choices_moving
    data["selected_first_4_choices_moving"] = selected_first_4_choices_moving
    
    data["selected_first_1_choices_moving_male"] = selected_first_1_choices_moving_male
    data["selected_first_2_choices_moving_male"] = selected_first_2_choices_moving_male
    data["selected_first_3_choices_moving_male"] = selected_first_3_choices_moving_male
    data["selected_first_4_choices_moving_male"] = selected_first_4_choices_moving_male
    
    data["selected_first_choice_event"] = selected_first_choice_event
    data["selected_second_choice_event"] = selected_second_choice_event
    data["selected_third_choice_event"] = selected_third_choice_event
    data["selected_fourth_choice_event"] = selected_fourth_choice_event
    
    data["selected_first_1_choices_event"] = selected_first_1_choices_event
    data["selected_first_2_choices_event"] = selected_first_2_choices_event
    data["selected_first_3_choices_event"] = selected_first_3_choices_event
    data["selected_first_4_choices_event"] = selected_first_4_choices_event
    
    data["selected_first_1_choices_event_male"] = selected_first_1_choices_event_male
    data["selected_first_2_choices_event_male"] = selected_first_2_choices_event_male
    data["selected_first_3_choices_event_male"] = selected_first_3_choices_event_male
    data["selected_first_4_choices_event_male"] = selected_first_4_choices_event_male
    
    data["selected_first_choice_shopping"] = selected_first_choice_shopping
    data["selected_second_choice_shopping"] = selected_second_choice_shopping
    data["selected_third_choice_shopping"] = selected_third_choice_shopping
    data["selected_fourth_choice_shopping"] = selected_fourth_choice_shopping
    
    data["selected_first_1_choices_shopping"] = selected_first_1_choices_shopping
    data["selected_first_2_choices_shopping"] = selected_first_2_choices_shopping
    data["selected_first_3_choices_shopping"] = selected_first_3_choices_shopping
    data["selected_first_4_choices_shopping"] = selected_first_4_choices_shopping
    
    data["selected_first_1_choices_shopping_male"] = selected_first_1_choices_shopping_male
    data["selected_first_2_choices_shopping_male"] = selected_first_2_choices_shopping_male
    data["selected_first_3_choices_shopping_male"] = selected_first_3_choices_shopping_male
    data["selected_first_4_choices_shopping_male"] = selected_first_4_choices_shopping_male
    
    data["seen_females_moving"] = seen_female_moving
    data["seen_females_event"] = seen_female_event
    data["seen_females_shopping"] = seen_female_shopping
    del data['shopping_choices']
    del data['event_choices']
    del data['moving_choices']
    del data['shopping_ranking']
    del data['event_ranking']
    del data['moving_ranking']
    del data['shopping_actions']
    del data['event_actions']
    del data['moving_actions']
    del data['briefing']
    
    data = expand_survey_answers(data)
    
    return data

def create_candidate_rows(seen,selected,task, other_values):
    rows = []
    for cand in seen:
        cand_id = cand["id"]
        cand_rank = cand["rank"]
        cand_score = cand["score"]
        cand_positive_reviews = cand["tag1"][0:len(cand["tag1"])-1]
        cand_reliability = cand["tag2"]
        cand_tasks_completed = cand["tag3"]
        cand_isFemale = 0
        cand_selected = 0
        cand_priority = 0
        if cand["gender"] == 'f':
            cand_isFemale = 1
        for i in range(0,len(selected)):
            if selected[i]["id"] == cand_id:
                cand_selected = 1
                cand_priority = i+1
        rows.append([task,cand_priority,cand_id,cand_rank,cand_score,cand_positive_reviews,
                    cand_reliability,cand_tasks_completed,cand_isFemale,cand_selected]+other_values)
    return rows
        
def expand_candidate_data(data):
    rows = []
    data = expand_survey_answers(data)
    cols = ['task','cand_priority','cand_id','cand_rank','cand_score','cand_positive_reviews','cand_reliability',
            'cand_tasks_completed','cand_isFemale','cand_selected']+list(data.columns)
    for i in range(0,len(data)):
        other_values = list(data.loc[i])
        moving_seen=data.loc[i]["moving_ranking"]
        moving_selected = data.loc[i]["moving_choices"]
        moving_rows = create_candidate_rows(moving_seen,moving_selected,0,other_values)
        for r in moving_rows:
            rows.append(r)
        event_seen=data.loc[i]["event_ranking"]
        event_selected=data.loc[i]["event_choices"]
        event_rows = create_candidate_rows(event_seen,event_selected,1,other_values)
        for r in event_rows:
            rows.append(r)
        shopping_seen = data.loc[i]["shopping_ranking"]
        shopping_selected = data.loc[i]["shopping_choices"]
        shopping_rows = create_candidate_rows(shopping_seen,shopping_selected,2,other_values)
        for r in shopping_rows:
            rows.append(r)
    df = pd.DataFrame(rows,columns=cols)
    return df

def load_data_as_df():
    path_to_jsonfiles = './Final Data m,e,s'
    cols = ['uID', 'ranking_type','data_order','briefing','task_order','moving_ranking', 'event_ranking', 'shopping_ranking',
               'moving_actions','event_actions','shopping_actions','moving_choices','event_choices',
            'shopping_choices','survey_answers']
    data = pd.DataFrame(columns=cols)
    for file in os.listdir(path_to_jsonfiles):
        full_filename = "%s/%s" % (path_to_jsonfiles, file)
        with open(full_filename,'r') as fi:
            dict = json.load(fi)

            uID = file[0:len(file)-5]
            ranking_type = int(dict["ranking"][0]["itemID"])
            briefing = dict["briefing"]
            task_order = dict["treatmentOrder"]
            task_rankings = get_ranking_representations_in_order(dict)
            task_1_ranking = task_rankings[0]
            task_2_ranking = task_rankings[1]
            task_3_ranking = task_rankings[2]
            task_actions = get_task_actions_in_order(dict)
            task_1_actions = task_actions[0]
            task_2_actions = task_actions[1]
            task_3_actions = task_actions[2]
            task_1_choices = filter_for_choices(task_1_actions, task_1_ranking)
            task_2_choices = filter_for_choices(task_2_actions, task_2_ranking)
            task_3_choices = filter_for_choices(task_3_actions, task_3_ranking)
            survey_answers = get_survey_answers(dict)
            
            data = data.append({'uID': uID, 'ranking_type': ranking_type,'data_order': 0, 'briefing': briefing, 'task_order': task_order,
                               'moving_ranking': task_1_ranking, 'event_ranking': task_2_ranking, 'shopping_ranking': task_3_ranking,
                               'moving_actions': task_1_actions, 'event_actions': task_2_actions, 'shopping_actions': task_3_actions,
                               'moving_choices': task_1_choices, 'event_choices': task_2_choices, 'shopping_choices': task_3_choices,
                               'survey_answers': survey_answers}, ignore_index=True)
            
    path_to_jsonfiles = './Final Data s,m,e'
    for file in os.listdir(path_to_jsonfiles):
        full_filename = "%s/%s" % (path_to_jsonfiles, file)
        with open(full_filename,'r') as fi:
            dict = json.load(fi)
            uID = file[0:len(file)-5]
            ranking_type = int(dict["ranking"][0]["itemID"])
            briefing = dict["briefing"]
            task_order = dict["treatmentOrder"]
            task_rankings = get_ranking_representations_in_order(dict)
            task_1_ranking = task_rankings[0]
            task_2_ranking = task_rankings[1]
            task_3_ranking = task_rankings[2]
            task_actions = get_task_actions_in_order(dict)
            task_1_actions = task_actions[0]
            task_2_actions = task_actions[1]
            task_3_actions = task_actions[2]
            task_1_choices = filter_for_choices(task_1_actions, task_1_ranking)
            task_2_choices = filter_for_choices(task_2_actions, task_2_ranking)
            task_3_choices = filter_for_choices(task_3_actions, task_3_ranking)
            survey_answers = get_survey_answers(dict)
            
            data = data.append({'uID': uID, 'ranking_type': ranking_type,'data_order': 1, 'briefing': briefing, 'task_order': task_order,
                               'moving_ranking': task_1_ranking, 'event_ranking': task_2_ranking, 'shopping_ranking': task_3_ranking,
                               'moving_actions': task_1_actions, 'event_actions': task_2_actions, 'shopping_actions': task_3_actions,
                               'moving_choices': task_1_choices, 'event_choices': task_2_choices, 'shopping_choices': task_3_choices,
                               'survey_answers': survey_answers}, ignore_index=True)
    path_to_jsonfiles = './Final Data e,s,m'
    for file in os.listdir(path_to_jsonfiles):
        full_filename = "%s/%s" % (path_to_jsonfiles, file)
        with open(full_filename,'r') as fi:
            dict = json.load(fi)
            uID = file[0:len(file)-5]
            ranking_type = int(dict["ranking"][0]["itemID"])
            briefing = dict["briefing"]
            task_order = dict["treatmentOrder"]
            task_rankings = get_ranking_representations_in_order(dict)
            task_1_ranking = task_rankings[0]
            task_2_ranking = task_rankings[1]
            task_3_ranking = task_rankings[2]
            task_actions = get_task_actions_in_order(dict)
            task_1_actions = task_actions[0]
            task_2_actions = task_actions[1]
            task_3_actions = task_actions[2]
            task_1_choices = filter_for_choices(task_1_actions, task_1_ranking)
            task_2_choices = filter_for_choices(task_2_actions, task_2_ranking)
            task_3_choices = filter_for_choices(task_3_actions, task_3_ranking)
            survey_answers = get_survey_answers(dict)
            
            data = data.append({'uID': uID, 'ranking_type': ranking_type,'data_order': 2, 'briefing': briefing, 'task_order': task_order,
                               'moving_ranking': task_1_ranking, 'event_ranking': task_2_ranking, 'shopping_ranking': task_3_ranking,
                               'moving_actions': task_1_actions, 'event_actions': task_2_actions, 'shopping_actions': task_3_actions,
                               'moving_choices': task_1_choices, 'event_choices': task_2_choices, 'shopping_choices': task_3_choices,
                               'survey_answers': survey_answers}, ignore_index=True)
    return data


def get_percentage_male_selected_algo_swapped_compare(task, data_types):
    if task == "event":
        order = [1,0,2]
    if task == "moving":
        order = [0,2,1]
    if task == "shopping":
        order = [2,1,0]
    
    if "D1" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[0], 3, male_prop = True) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[0], 1, male_prop = True) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[0], 5, male_prop = True) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")
    if "D2" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[1], 3, male_prop = True) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[1], 1, male_prop = True) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[1], 5, male_prop = True) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")
    
    if "D3" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[2], 3, male_prop = True) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[2], 1, male_prop = True) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[2], 5, male_prop = True) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair_swapped vs. TaskRabbit_swapped - "+str((i+1))+" choice")

def compare_swapped_and_traditional_first_choices(task, data_types):
    if task == "event":
        order = [1,0,2]
    if task == "moving":
        order = [0,2,1]
    if task == "shopping":
        order = [2,1,0]
    
    if "D1" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[0], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[0], 3, male_prop = True) #rabbit_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "TaskRabbit_swapped vs. TaskRabbit - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[0], 0) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[0], 1, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. Random - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[0], 4) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[0], 5, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Fair_swapped vs. Fair - "+str((i+1))+" choice")
    
    
    if "D2" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[1], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[1], 3, male_prop = True) #rabbit_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "TaskRabbit_swapped vs. TaskRabbit - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[1], 0) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[1], 1, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. Random - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[1], 4) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[1], 5, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Fair_swapped vs. Fair - "+str((i+1))+" choice")
    
    if "D3" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[2], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[2], 3, male_prop = True) #rabbit_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "TaskRabbit_swapped vs. TaskRabbit - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[2], 0) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[2], 1, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random_swapped vs. Random - "+str((i+1))+" choice")
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[2], 4) #random
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[2], 5, male_prop = True) #random_swapped
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Fair_swapped vs. Fair - "+str((i+1))+" choice")
    
def get_data_order_in_order_for_task(task):
    if task == "event":
        order = [1,0,2]
    if task == "moving":
        order = [0,2,1]
    if task == "shopping":
        order = [2,1,0]
    return order

def get_ranking_type_string_by_number(ranking_type):
    if ranking_type == 0:
        output = "RANDOM(M>W)"
    if ranking_type == 1:
        output = "RANDOM(W>M)"
    if ranking_type == 2:
        output = "TASKRABBIT(M>W)"
    if ranking_type == 3:
        output = "TASKRABBIT(W>M)"
    if ranking_type == 4:
        output = "FAIR(M>W)"
    if ranking_type == 5:
        output = "FAIR(W>M)"
    return output

def get_data_set_name_by_task_and_order(task, order):
    #data_order:
    #0 = Moving-D1 / Event-D2 / Shopping-D3
    #1 = Moving-D3 / Event-D1 / Shopping-D2
    #2 = Moving-D2 / Event-D3 / Shopping-D1
    if task == "event":
        if order == 0:
            return "D2"
        if order == 1:
            return "D1"
        if order == 2:
            return "D3"
    if task == "moving":
        if order == 0:
            return "D1"
        if order == 1:
            return "D3"
        if order == 2:
            return "D2"
    if task == "shopping":
        if order == 0:
            return "D3"
        if order == 1:
            return "D2"
        if order == 2:
            return "D1"
    return "ERROR"

def get_percentage_selected_algo_compare(task, order, ranking_type, male_prop = False):
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["data_order"] == order)]
    data = data[(data["ranking_type"] == ranking_type)]
    print(task+" - "+get_data_set_name_by_task_and_order(task,order) + " - "+get_ranking_type_string_by_number(ranking_type))
    if male_prop:
        task = task + "_male"
    n_1 = len(data["selected_first_1_choices_"+task])
    y_1 = data["selected_first_1_choices_"+task].mean()
    var_1 = data["selected_first_1_choices_"+task].var()
    
    n_2 = len(data["selected_first_2_choices_"+task]) * 2
    y_2 = data["selected_first_2_choices_"+task].sum()/n_2
    var_2 = data["selected_first_2_choices_"+task].var()
    var_2 = (var_2*n_1)/n_2
    
    n_3 = len(data["selected_first_3_choices_"+task]) * 3
    y_3 = data["selected_first_3_choices_"+task].sum()/n_3
    var_3 = data["selected_first_3_choices_"+task].var()
    var_3 = (var_3*n_1)/n_3
    
    n_4 = len(data["selected_first_4_choices_"+task]) * 4
    y_4 = data["selected_first_4_choices_"+task].sum()/n_4
    var_4 = data["selected_first_4_choices_"+task].var()
    var_4 = (var_4*n_1)/n_4
    if male_prop:
        print("Male-Proportions:")
    else:
        print("Female-Proportions:")
    print("1 choice : "+str(round(y_1,4)))
    print("2 choice : "+str(round(y_2,4)))
    print("3 choice : "+str(round(y_3,4)))
    print("4 choice : "+str(round(y_4,4)))
    
    return [n_1,n_2,n_3,n_4], [y_1,y_2,y_3,y_4], [var_1,var_2,var_3,var_4]

def compare_and_print_p_value_proportions(y_1, var_1, n_1, y_2,var_2,n_2, title):
    diff = round(y_1 - y_2,4)
    abar = y_1
    avar = var_1
    na = n_1
    bbar = y_2
    bvar = var_2
    nb = n_2
    # Use scipy.stats.ttest_ind_from_stats.
    t2, p2 = ttest_ind_from_stats(abar, np.sqrt(avar), na,
                              bbar, np.sqrt(bvar), nb,
                              equal_var=False)
    print(str(y_1) + "vs. " + str(y_2))
    print(title+" Diff: "+ str(abs(diff)) + ", P-value: "+str(round(p2,6)) +" - "+str(n_1)+","+str(n_2))

def get_percentage_female_selected_algo_compare(task, data_types):
    if task == "event":
        order = [1,0,2]
    if task == "moving":
        order = [0,2,1]
    if task == "shopping":
        order = [2,1,0]
    
    if "D1" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[0], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[0], 0) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random vs. TaskRabbit - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[0], 4) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair vs. TaskRabbit - "+str((i+1))+" choice")
    if "D2" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[1], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[1], 0) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random vs. TaskRabbit - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[1], 4) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair vs. TaskRabbit - "+str((i+1))+" choice")
    
    if "D3" in data_types:
        n_1, y_1, var_1 = get_percentage_selected_algo_compare(task, order[2], 2) #rabbit
        n_2, y_2, var_2 =get_percentage_selected_algo_compare(task, order[2], 0) #random
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_2[i],var_2[i],n_2[i],
                                                  "Random vs. TaskRabbit - "+str((i+1))+" choice")
        n_3, y_3, var_3 =get_percentage_selected_algo_compare(task, order[2], 4) #fair
        for i in range(0,len(n_1)):
            compare_and_print_p_value_proportions(y_1[i],var_1[i],n_1[i],y_3[i],var_3[i],n_3[i],
                                                  "Fair vs. TaskRabbit - "+str((i+1))+" choice")
    
    
    
    
def get_trust_per_task_dataset_algorithm():
    print("CHECK RANKING TYPES BEFORE USING RESULTS")
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"] == 3)]
    print("TASKRABBIT")
    print(round(data["trust_answer"].mean(),2))
    print(round(data["trust_answer"].std(),2))
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"] == 1)]
    print("RANDOM")
    print(round(data["trust_answer"].mean(),2))
    print(round(data["trust_answer"].std(),2))
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"] == 5)]
    print("FAIR")
    print(round(data["trust_answer"].mean(),2))
    print(round(data["trust_answer"].std(),2))
    
def create_demographic_data_frames(data, norm):
    cols_gender = ["Male", "Female", "Other"]
    cols_age = ["18-24", "25-34", "35-44", "45-54", ">=55"]
    cols_education = ["None", "Highschool", "College", "Master", "PhD"]
    cols_income = ["<10k", "10k-29k", "30k-59k", "60k-89k", "90k-119k", ">=120k"]
    
    females = 0
    #universe_females = max(1,len(universe[(universe["user_gender"] == "female")]))
    males = 0
    #universe_males = max(1,len(universe[(universe["user_gender"] == "male")]))
    others = 0
    #universe_others = max(1,len(universe[(universe["user_gender"] == "somethingElse")]))
    for entry in data["user_gender"]:
        if entry == "female":
            females+=1
        elif entry == "male":
            males+=1
        else:
            others+=1
    df_gender = pd.DataFrame([[males/norm,females/norm,others/norm]],columns=cols_gender)
    
    age_1824 = len(data[(data["user_age"] == 21.0)])/norm
    age_2534 = len(data[(data["user_age"] == 29.5)])/norm
    age_3544 = len(data[(data["user_age"] == 39.5)])/norm
    age_4554 = len(data[(data["user_age"] == 49.5)])/norm
    age_ge55 = len(data[(data["user_age"] == 60.0)])/norm
    df_age = pd.DataFrame([[age_1824,age_2534,age_3544,age_4554,age_ge55]],columns=cols_age)
    
    none = len(data[(data["user_education"] == "none")])/norm
    highschool = len(data[(data["user_education"] == "highschool")])/norm
    college = len(data[(data["user_education"] == "college")])/norm
    master = len(data[(data["user_education"] == "master")])/norm
    phd = len(data[(data["user_education"] == "PhD")])/norm
    df_education = pd.DataFrame([[none,highschool,college,master,phd]],columns=cols_education)
    
    income_less10 = len(data[(data["user_income"] == 5.0)])/norm
    income_1029 = len(data[(data["user_income"] == 19.5)])/norm
    income_3059 = len(data[(data["user_income"] == 44.5)])/norm
    income_6089 = len(data[(data["user_income"] == 74.5)])/norm
    income_90119 = len(data[(data["user_income"] == 104.5)])/norm
    income_ge120 = len(data[(data["user_income"] == 135.0)])/norm
    df_income = pd.DataFrame([[income_less10,income_1029,income_3059,income_6089,income_90119,income_ge120]]
                             ,columns=cols_income)
    
    return df_gender, df_age, df_education, df_income

def plot_user_demographics_by_number_of_females_selected(task, data):
    #0 women
    data_0 = data[(data["selected_first_4_choices_"+task] == 0)]
    df_gender_0, df_age_0, df_education_0, df_income_0 = create_demographic_data_frames(data_0, data)
    
    #1 women
    data_1 = data[(data["selected_first_4_choices_"+task] == 1)]
    df_gender_1, df_age_1, df_education_1, df_income_1 = create_demographic_data_frames(data_1, data)
    
    #2 women
    data_2 = data[(data["selected_first_4_choices_"+task] == 2)]
    df_gender_2, df_age_2, df_education_2, df_income_2 = create_demographic_data_frames(data_2, data)
    
    #3 women
    data_3 = data[(data["selected_first_4_choices_"+task] == 3)]
    df_gender_3, df_age_3, df_education_3, df_income_3 = create_demographic_data_frames(data_3, data)
    
    df_gender_all = pd.DataFrame([list(df_gender_0.loc[0]),list(df_gender_1.loc[0]), list(df_gender_2.loc[0]),
                                  list(df_gender_3.loc[0])],columns=["Male", "Female", "Other"])
    df_age_all = pd.DataFrame([list(df_age_0.loc[0]),list(df_age_1.loc[0]),list(df_age_2.loc[0]),list(df_age_3.loc[0])]
                              , columns = ["18-24", "25-34", "35-44", "45-54", ">=55"])
    df_education_all = pd.DataFrame([list(df_education_0.loc[0]),list(df_education_1.loc[0]),list(df_education_2.loc[0]),list(df_education_3.loc[0])],
                                   columns = ["None", "Highschool", "College", "Master", "PhD"])
    df_income_all = pd.DataFrame([list(df_income_0.loc[0]),list(df_income_1.loc[0]),list(df_income_2.loc[0]),list(df_income_3.loc[0])],
                                columns = ["<10k", "10k-29k", "30k-59k", "60k-89k", "90k-119k", ">=120k"])
    list_bars = []
    #<10k	10k-29k	30k-59k	60k-89k	90k-119k	>=120k
    bars_male = [df_gender_0["Male"][0], df_gender_1["Male"][0], df_gender_2["Male"][0], df_gender_3["Male"][0]]
    bars_female = [df_gender_0["Female"][0], df_gender_1["Female"][0], df_gender_2["Female"][0], df_gender_3["Female"][0]]
    bars_somethingElse = [df_gender_0["Other"][0], df_gender_1["Other"][0], df_gender_2["Other"][0], df_gender_3["Other"][0]]
    
    #list_bars.append(bars_male)
    list_bars.append(bars_female)
    list_bars.append(bars_somethingElse)
    
    bars_1824 = [df_age_0["18-24"][0], df_age_1["18-24"][0], df_age_2["18-24"][0], df_age_3["18-24"][0]]
    bars_2534 = [df_age_0["25-34"][0], df_age_1["25-34"][0], df_age_2["25-34"][0], df_age_3["25-34"][0]]
    bars_3544 = [df_age_0["35-44"][0], df_age_1["35-44"][0], df_age_2["35-44"][0], df_age_3["35-44"][0]]
    bars_4554 = [df_age_0["45-54"][0], df_age_1["45-54"][0], df_age_2["45-54"][0], df_age_3["45-54"][0]]
    bars_ge55 = [df_age_0[">=55"][0], df_age_1[">=55"][0], df_age_2[">=55"][0], df_age_3[">=55"][0]]
    
    list_bars.append(bars_1824)
    list_bars.append(bars_2534)
    list_bars.append(bars_3544)
    list_bars.append(bars_4554)
    list_bars.append(bars_ge55)
    
   # bars_none = [df_education_0["None"][0], df_education_1["None"][0], df_education_2["None"][0], df_education_3["None"][0]]
    #bars_hs = [df_education_0["Highschool"][0], df_education_1["Highschool"][0], df_education_2["Highschool"][0], df_education_3["Highschool"][0]]
    #bars_college = [df_education_0["College"][0], df_education_1["College"][0], df_education_2["College"][0], df_education_3["College"][0]]
    #bars_master = [df_education_0["Master"][0], df_education_1["Master"][0], df_education_2["Master"][0], df_education_3["Master"][0]]
    #bars_phd = [df_education_0["PhD"][0], df_education_1["PhD"][0], df_education_2["PhD"][0], df_education_3["PhD"][0]]
    
    #list_bars.append(bars_none)
    #list_bars.append(bars_hs)
    #list_bars.append(bars_college)
    #list_bars.append(bars_master)
    #list_bars.append(bars_phd)
    
    #bars_10k = [df_income_0["<10k"][0], df_income_1["<10k"][0], df_income_2["<10k"][0], df_income_3["<10k"][0]]
    #bars_1029 = [df_income_0["10k-29k"][0], df_income_1["10k-29k"][0], df_income_2["10k-29k"][0], df_income_3["10k-29k"][0]]
    #bars_3059 = [df_income_0["30k-59k"][0], df_income_1["30k-59k"][0], df_income_2["30k-59k"][0], df_income_3["30k-59k"][0]]
    #bars_6089 = [df_income_0["60k-89k"][0], df_income_1["60k-89k"][0], df_income_2["60k-89k"][0], df_income_3["60k-89k"][0]]
    #bars_90119 = [df_income_0["90k-119k"][0], df_income_1["90k-119k"][0], df_income_2["90k-119k"][0], df_income_3["90k-119k"][0]]
    #bars_120 = [df_income_0[">=120k"][0], df_income_1[">=120k"][0], df_income_2[">=120k"][0], df_income_3[">=120k"][0]]
    
    #list_bars.append(bars_10k)
    #list_bars.append(bars_1029)
    #list_bars.append(bars_3059)
    #list_bars.append(bars_6089)
    #list_bars.append(bars_120)
 
    # Heights of bars1 + bars2
    bars1 = np.add(bars_male, bars_female).tolist()
    bars2 = np.add(bars1, bars_somethingElse).tolist()
    bars3 = np.add(bars2, bars_1824).tolist()
    bars4 = np.add(bars3, bars_2534).tolist()
    bars5 = np.add(bars4, bars_3544).tolist()
    bars6 = np.add(bars5, bars_4554).tolist()
    bars7 = np.add(bars6, bars_ge55).tolist()
    #bars8 = np.add(bars7, bars_none).tolist()
    #bars9 = np.add(bars8, bars_hs).tolist()
    #bars10 = np.add(bars9, bars_college).tolist()
    #bars11 = np.add(bars10, bars_master).tolist()
    #bars12 = np.add(bars11, bars_phd).tolist()
    #bars13 = np.add(bars12, bars_10k).tolist()
    #bars14 = np.add(bars13, bars_1029).tolist()
    #bars15 = np.add(bars14, bars_3059).tolist()
    #bars16 = np.add(bars15, bars_6089).tolist()
    #bars17 = np.add(bars16, bars_90119).tolist()
    #bars18 = np.add(bars17, bars_120).tolist()
 
    list_added_bars = []
    list_added_bars.append(bars1)
    list_added_bars.append(bars2)
    list_added_bars.append(bars3)
    list_added_bars.append(bars4)
    list_added_bars.append(bars5)
    list_added_bars.append(bars6)
    list_added_bars.append(bars7)
    #list_added_bars.append(bars8)
    #list_added_bars.append(bars9)
    #list_added_bars.append(bars10)
    #list_added_bars.append(bars11)
    #list_added_bars.append(bars12)
    #list_added_bars.append(bars13)
    #list_added_bars.append(bars14)
    #list_added_bars.append(bars15)
    #list_added_bars.append(bars16)
    #list_added_bars.append(bars17)
    #list_added_bars.append(bars18)

    # The position of the bars on the x-axis
    r = [0,1,2,3]
    #print(list_added_bars)
    
    # Names of group and bar width
    names = ['0','1','2','3']
    barWidth = 1
    #list_bars.remove(bars_male)
    color=cm.rainbow(np.linspace(0,2,len(list_bars)))
    #plt.bar(r, bars_male, color='b', edgecolor='white', width=barWidth)
    counter= 0
    #for b,c in zip(list_bars,color):
    #    print(b)
    #    plt.bar(r, b, bottom=list_added_bars[counter], color=c, edgecolor='white', width=barWidth)
    #    counter+=1
 
    # Custom X axis
    #plt.xticks([0,1,2,3], ['0','1','2','3'], fontweight='bold')
    #plt.xlabel("selected females")
    return df_gender_all, df_age_all, df_education_all, df_income_all
    # Show graphic
   # plt.show()

def compare_job_contexts(rtypes, male):
    text = ""
    if male:
        text = "_male"
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"].isin(rtypes))] #all TaskRabbit(M>W)
    print("all algorithms (M>W)")
    n_moving = len(data["selected_first_4_choices_moving"+text])
    y_moving = data["selected_first_4_choices_moving"+text].mean()
    var_moving = data["selected_first_4_choices_moving"+text].var()
    
    n_event = len(data["selected_first_4_choices_event"+text])
    y_event = data["selected_first_4_choices_event"+text].mean()
    var_event = data["selected_first_4_choices_event"+text].var()
    
    n_shopping = len(data["selected_first_4_choices_shopping"+text])
    y_shopping = data["selected_first_4_choices_shopping"+text].mean()
    var_shopping = data["selected_first_4_choices_shopping"+text].var()
    compare_and_print_p_value_proportions(y_moving, var_moving, n_moving, y_event,var_event,n_event,
                                          "Moving vs. Event - All")
    compare_and_print_p_value_proportions(y_event, var_event, n_event, y_shopping,var_shopping,n_shopping,
                                          "Event vs. Shopping - All")
    compare_and_print_p_value_proportions(y_moving, var_moving, n_moving, y_shopping,var_shopping,n_shopping,
                                          "Moving vs. Shopping - All")

In [41]:
#ranking_type: 
#0 = Random (M>F)
#1 = Random_swapped (F>M)
#2 = TaskRabbit (M>F)
#3 = TaskRabbit_swapped (F>M)
#4 = Fair (M>F)
#5 = Fair_swapped (F>M)
#
#data_order:
#0 = Moving-D1 / Event-D2 / Shopping-D3
#1 = Moving-D3 / Event-D1 / Shopping-D2
#2 = Moving-D2 / Event-D3 / Shopping-D1
#
#task_order:
#0 = Moving / 1 = Event / 2 = Shopping

#data = load_data_as_df()
#data = expand_user_data(data)
#data = expand_candidate_data(data)

#rt_0 = len(data[(data["ranking_type"] == 0)])
#rt_1 = len(data[(data["ranking_type"] == 1)])
#rt_2 = len(data[(data["ranking_type"] == 2)])
#rt_3 = len(data[(data["ranking_type"] == 3)])
#rt_4 = len(data[(data["ranking_type"] == 4)])
#rt_5 = len(data[(data["ranking_type"] == 5)])
#print(rt_0)
#print(rt_1)
#print(rt_2)
#print(rt_3)
#print(rt_4)
#print(rt_5)

#Method that plots the selection rates per selected candidate per
#algorithm and dataset(equality)
def get_n_avg_var_for_all_data_sets(rtype,choices, task, male):
    
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"].isin(rtype) )]
    data = data[(data["data_order"] == 0)]
    n = len(data["selected_first_"+str(choices)+"_choices_"+task])
    n_1 = n*choices
    y_1 = data["selected_first_"+str(choices)+"_choices_"+task].sum()/n_1
    var_1 = data["selected_first_"+str(choices)+"_choices_"+task].var()
    var_1 = (var_1*n)/n_1
    
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"].isin(rtype))]
    data = data[(data["data_order"] == 1)]
    n = len(data["selected_first_"+str(choices)+"_choices_"+task])
    n_2 = n*choices
    y_2 = data["selected_first_"+str(choices)+"_choices_"+task].sum()/n_2
    var_2 = data["selected_first_"+str(choices)+"_choices_"+task].var()
    var_2 = (var_2*n)/n_2
    
    data = load_data_as_df()
    data = expand_user_data(data)
    data = data[(data["ranking_type"].isin(rtype))]
    data = data[(data["data_order"] == 2)]
    n = len(data["selected_first_"+str(choices)+"_choices_"+task])
    n_3 = n*choices
    y_3 = data["selected_first_"+str(choices)+"_choices_"+task].sum()/n_3
    var_3 = data["selected_first_"+str(choices)+"_choices_"+task].var()
    var_3 = (var_3*n)/n_3
    
    
    
    n = (n_1 + n_2 + n_3)
    y = (y_1 + y_2 + y_3)/3
    var = (var_1 + var_2 + var_3)/3
    return n,y,var
    
def compare_algorithms_for_all_datasets(task, choices, male=False):
    ranking_types = [2,0,4]
    if male:
        ranking_types = [3,1,5]
        task = task+ "_male"
    n_tr,y_tr,var_tr = get_n_avg_var_for_all_data_sets([ranking_types[0]],choices,task,male)
    n_r, y_r, var_r = get_n_avg_var_for_all_data_sets([ranking_types[1]],choices,task,male)
    n_f, y_f , var_f = get_n_avg_var_for_all_data_sets([ranking_types[2]],choices,task,male)
    compare_and_print_p_value_proportions(y_r,var_r,n_r,y_tr,var_tr,n_tr,"Random vs. TaskRabbit - "+str((choices))+" choice")
    compare_and_print_p_value_proportions(y_f,var_f,n_f,y_tr,var_tr,n_tr,"Fair vs. TaskRabbit - "+str((choices))+" choice")
    



In [64]:
data_types = ["D2"]
task = "moving"
choices = 1
#compare_algorithms_for_all_datasets(task,choices)

get_percentage_female_selected_algo_compare(task, data_types)
#print("----------------------------------")
#get_percentage_male_selected_algo_swapped_compare(task, data_types)
#print("----------------------------------")
#compare_algorithms_for_all_datasets(task, 3)
#compare_swapped_and_traditional_first_choices(task,data_types)

moving - D2 - TASKRABBIT(M>W)
Female-Proportions:
1 choice : 0.2459
2 choice : 0.3934
3 choice : 0.4426
4 choice : 0.4139
moving - D2 - RANDOM(M>W)
Female-Proportions:
1 choice : 0.4426
2 choice : 0.4836
3 choice : 0.5301
4 choice : 0.4631
0.2459016393442623vs. 0.4426229508196721
Random vs. TaskRabbit - 1 choice Diff: 0.1967, P-value: 0.022177 - 61,61
0.39344262295081966vs. 0.48360655737704916
Random vs. TaskRabbit - 2 choice Diff: 0.0902, P-value: 0.040911 - 122,122
0.4426229508196721vs. 0.5300546448087432
Random vs. TaskRabbit - 3 choice Diff: 0.0874, P-value: 0.038232 - 183,183
0.4139344262295082vs. 0.46311475409836067
Random vs. TaskRabbit - 4 choice Diff: 0.0492, P-value: 0.084654 - 244,244
moving - D2 - FAIR(M>W)
Female-Proportions:
1 choice : 0.6066
2 choice : 0.4672
3 choice : 0.5191
4 choice : 0.4549
0.2459016393442623vs. 0.6065573770491803
Fair vs. TaskRabbit - 1 choice Diff: 0.3607, P-value: 3.7e-05 - 61,61
0.39344262295081966vs. 0.4672131147540984
Fair vs. TaskRabbit - 2 ch

In [54]:
task = "shopping"
choices = 1
compare_algorithms_for_all_datasets(task,choices,True)


0.24308713790469474vs. 0.16139359698681735
Random vs. TaskRabbit - 1 choice Diff: 0.0817, P-value: 0.030139 - 183,179
0.2566024451348516vs. 0.16139359698681735
Fair vs. TaskRabbit - 1 choice Diff: 0.0952, P-value: 0.017268 - 174,179


In [55]:
task = "shopping"
choices = 2
compare_algorithms_for_all_datasets(task,choices,True)

0.2644603090663376vs. 0.2004237288135593
Random vs. TaskRabbit - 2 choice Diff: 0.064, P-value: 0.004873 - 366,358
0.28539190856549124vs. 0.2004237288135593
Fair vs. TaskRabbit - 2 choice Diff: 0.085, P-value: 0.000639 - 348,358


In [56]:
task = "shopping"
choices = 3
compare_algorithms_for_all_datasets(task,choices,True)

0.3027968740819085vs. 0.24500941619585684
Random vs. TaskRabbit - 3 choice Diff: 0.0578, P-value: 0.003161 - 549,537
0.302456413779432vs. 0.24500941619585684
Fair vs. TaskRabbit - 3 choice Diff: 0.0574, P-value: 0.004539 - 522,537


In [57]:
task = "shopping"
choices = 4
compare_algorithms_for_all_datasets(task,choices,True)

0.2857607673776368vs. 0.256379472693032
Random vs. TaskRabbit - 4 choice Diff: 0.0294, P-value: 0.059315 - 732,716
0.28717560265362413vs. 0.256379472693032
Fair vs. TaskRabbit - 4 choice Diff: 0.0308, P-value: 0.045672 - 696,716


In [63]:
rtypes = [2,0,4]
male = True
compare_job_contexts(rtypes, male)

all algorithms (M>W)
1.1585820895522387vs. 1.0615671641791045
Moving vs. Event - All Diff: 0.097, P-value: 0.051882 - 536,536
1.0615671641791045vs. 1.1063432835820894
Event vs. Shopping - All Diff: 0.0448, P-value: 0.359418 - 536,536
1.1585820895522387vs. 1.1063432835820894
Moving vs. Shopping - All Diff: 0.0522, P-value: 0.304824 - 536,536
