# Data Exclusion
Experimental data is excluded below based on the following criteria:
1. A subject is excluded if they self-report to have not understood the task instructions in the post-task questionnaire
2. A musical set (one of the test conditions each subject interacts with) is excluded from a subject's data if there's insufficient responses to perform statistics on the data (fewer than 10 responses)

## Imports

In [25]:
import math

import seaborn as sns
import StudyII_All_5_note_Sets.paths as StudyII_paths
import StudyI_Pentatonic_vs_Chromatic.paths as StudyI_paths
import Study_Likert.paths as Study_likert_paths
import Study_Uniform.paths as Study_uniform_paths
import Study_Tritave_Uniform.paths as Study_tritave_uniform_paths
import Study_Tritave_pentachords.paths as Study_tritave_pentachords_paths
import pandas as pd

In [26]:
studyI_GL = pd.read_csv(StudyI_paths.processed_dir + 'group_level_results.csv')
studyII_GL = pd.read_csv(StudyII_paths.processed_dir + 'group_level_results.csv')
study_uniform = pd.read_csv(Study_uniform_paths.processed_dir + 'group_level_results.csv')
study_tritave_uniform = pd.read_csv(Study_tritave_uniform_paths.processed_dir + 'group_level_results.csv')
study_tritave_pentachords = pd.read_csv(Study_tritave_pentachords_paths.processed_dir + 'group_level_results.csv')
decoys = pd.read_csv(StudyII_paths.processed_dir + 'group_level_decoy_results.csv')
study_likert = pd.read_csv(Study_likert_paths.processed_dir + 'group_level_results.csv')

## Statistics scripts (do not change data, but report the impact of the exclusion on the data)

In [27]:
# Standard Error of the Mean
def get_SEM(pentatonic_only=True):
    temp = studyII_GL
    if(pentatonic_only):
        temp = temp[temp['set']=="0 2 4 7 9"]
    SEM = temp.groupby("set")['rate shifted - rate swapped (NN)'].sem().reset_index()['rate shifted - rate swapped (NN)'].median()
    return SEM

# Standard Deviation
def get_STD(pentatonic_only=True):
    temp = studyII_GL
    if(pentatonic_only):
        temp = temp[temp['set']=="0 2 4 7 9"]
    STD = temp.groupby("set")['rate shifted - rate swapped (NN)'].std().reset_index()['rate shifted - rate swapped (NN)'].median()
    return STD

# Mean
def get_MEAN(pentatonic_only=True):
    temp = studyII_GL
    if(pentatonic_only):
        temp = temp[temp['set']=="0 2 4 7 9"]
    MEAN = temp.groupby("set")['rate shifted - rate swapped (NN)'].mean().reset_index()['rate shifted - rate swapped (NN)'].median()
    return MEAN

## Total number of subjects

In [28]:
tasks_before_removal = studyI_GL.groupby('subject').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))

There are 102 tasks in total prior to exclusion


## Exclude subjects who "didn't understand" the task
### Study I

In [29]:
#Study I: remove subjects that selected "didn't understand the study"
temp = studyI_GL
tasks_before_removal = studyI_GL.groupby('subject').count().shape[0]
subjects_before_removal = studyI_GL.groupby('sona').count().shape[0]
studyI_GL = studyI_GL[studyI_GL['understood task']==True]
tasks_after_removal = studyI_GL.groupby('subject').count().shape[0]
subjects_after_removal = studyI_GL.groupby('sona').count().shape[0]
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

Removed 4 tasks
Removed 4 subjects


### Study II

In [30]:
tasks_before_removal = studyII_GL.groupby('sona').count().shape[0]
print("There are {} subjects in total prior to exclusion".format(tasks_before_removal))

There are 742 subjects in total prior to exclusion


In [31]:
#Study II: remove subjects that selected "didn't understand the study"
tasks_before_removal = studyII_GL.groupby('subject').count().shape[0]
subjects_before_removal = studyII_GL.groupby('sona').count().shape[0]
studyII_GL = studyII_GL[studyII_GL['understood task']==True]
tasks_after_removal = studyII_GL.groupby('subject').count().shape[0]
subjects_after_removal = studyII_GL.groupby('sona').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))


There are 843 tasks in total prior to exclusion
Removed 51 tasks
There are 742 subjects in total prior to exclusion
Removed 44 subjects


### Likert Experiment

In [32]:
#Likert: remove subjects that selected "didn't understand the study"
subjects_before_removal = study_likert.drop_duplicates(subset=['sona','subject'], keep='last').count()[0]
study_likert = study_likert[study_likert['understood task']==True]
subjects_after_removal = study_likert.drop_duplicates(subset=['sona','subject'], keep='last').count()[0]
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 157 subjects in total prior to exclusion
Removed 10 subjects


### Uniform Set Experiment

In [33]:
#Uniform: remove subjects that selected "didn't understand the study"
tasks_before_removal = study_uniform.groupby('subject').count().shape[0]
subjects_before_removal = study_uniform.groupby('sona').count().shape[0]
study_uniform = study_uniform[study_uniform['understood task']==True]
tasks_after_removal = study_uniform.groupby('subject').count().shape[0]
subjects_after_removal = study_uniform.groupby('sona').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 57 tasks in total prior to exclusion
There are 57 subjects in total prior to exclusion
Removed 9 tasks
Removed 9 subjects


### Tritave Uniform Set Experiment

In [34]:
#Tritave-Uniform: remove subjects that selected "didn't understand the study"
tasks_before_removal = study_tritave_uniform.groupby('subject').count().shape[0]
subjects_before_removal = study_tritave_uniform.groupby('sona').count().shape[0]
study_tritave_uniform = study_tritave_uniform[study_tritave_uniform['understood task']==True]
tasks_after_removal = study_tritave_uniform.groupby('subject').count().shape[0]
subjects_after_removal = study_tritave_uniform.groupby('sona').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 29 tasks in total prior to exclusion
There are 29 subjects in total prior to exclusion
Removed 2 tasks
Removed 2 subjects


### Tritave Pentachord Set Experiment

In [35]:
#Tritave-Pentachord: remove subjects that selected "didn't understand the study"
tasks_before_removal = study_tritave_pentachords.groupby('subject').count().shape[0]
subjects_before_removal = study_tritave_pentachords.groupby('sona').count().shape[0]
study_tritave_pentachords = study_tritave_pentachords[study_tritave_pentachords['understood task']==True]
tasks_after_removal = study_tritave_pentachords.groupby('subject').count().shape[0]
subjects_after_removal = study_tritave_pentachords.groupby('sona').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 40 tasks in total prior to exclusion
There are 40 subjects in total prior to exclusion
Removed 5 tasks
Removed 5 subjects


## Exclude sets with too few responses
### Study I

In [36]:
min_responses = 15

In [37]:
#Study I: remove sets with fewer than 15 no-neither responses
tasks_before_removal = studyI_GL.groupby('subject').count().shape[0]
subjects_before_removal = studyI_GL.groupby('sona').count().shape[0]
studyI_GL = studyI_GL[studyI_GL['# no_neither_trials']>=min_responses].reset_index()
tasks_after_removal = studyI_GL.groupby('subject').count().shape[0]
subjects_after_removal = studyI_GL.groupby('sona').count().shape[0]
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

Removed 0 tasks
Removed 0 subjects


### Study II

In [38]:
#Study II: remove sets with fewer than 15 no-neither responses
tasks_before_removal = studyII_GL.groupby('subject').count().shape[0]
subjects_before_removal = studyII_GL.groupby('sona').count().shape[0]
studyII_GL = studyII_GL[studyII_GL['# no_neither_trials']>=min_responses].reset_index()
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
tasks_after_removal = studyII_GL.groupby('subject').count().shape[0]
subjects_after_removal = studyII_GL.groupby('sona').count().shape[0]
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 792 tasks in total prior to exclusion
Removed 694 tasks
There are 698 subjects in total prior to exclusion
Removed 130 subjects


### Uniform Set Experiment

In [39]:
#Study Uniform Sets: remove sets with fewer than 15 no-neither responses
tasks_before_removal = study_uniform.groupby('subject').count().shape[0]
subjects_before_removal = study_uniform.groupby('sona').count().shape[0]
study_uniform = study_uniform[study_uniform['# no_neither_trials']>=min_responses].reset_index()
tasks_after_removal = study_uniform.groupby('subject').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
subjects_after_removal = study_uniform.groupby('sona').count().shape[0]
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 48 tasks in total prior to exclusion
Removed 1 tasks
There are 48 subjects in total prior to exclusion
Removed 1 subjects


### Tritave Uniform Set Experiment

In [40]:
#Study Tritave Uniform Sets: remove sets with fewer than 15 no-neither responses
tasks_before_removal = study_tritave_uniform.groupby('subject').count().shape[0]
subjects_before_removal = study_tritave_uniform.groupby('sona').count().shape[0]
study_tritave_uniform = study_tritave_uniform[study_tritave_uniform['# no_neither_trials']>=min_responses].reset_index()
tasks_after_removal = study_tritave_uniform.groupby('subject').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
subjects_after_removal = study_tritave_uniform.groupby('sona').count().shape[0]
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 27 tasks in total prior to exclusion
Removed 0 tasks
There are 27 subjects in total prior to exclusion
Removed 0 subjects


### Tritave Pentachord Set Experiment

In [41]:
#Study Tritave Pentachord Sets: remove sets with fewer than 15 no-neither responses
tasks_before_removal = study_tritave_pentachords.groupby('subject').count().shape[0]
subjects_before_removal = study_tritave_pentachords.groupby('sona').count().shape[0]
study_tritave_pentachords = study_tritave_pentachords[study_tritave_pentachords['# no_neither_trials']>=min_responses].reset_index()
tasks_after_removal = study_tritave_pentachords.groupby('subject').count().shape[0]
print("There are {} tasks in total prior to exclusion".format(tasks_before_removal))
print("Removed {} tasks".format(tasks_before_removal-tasks_after_removal))
subjects_after_removal = study_tritave_pentachords.groupby('sona').count().shape[0]
print("There are {} subjects in total prior to exclusion".format(subjects_before_removal))
print("Removed {} subjects".format(subjects_before_removal-subjects_after_removal))

There are 35 tasks in total prior to exclusion
Removed 0 tasks
There are 35 subjects in total prior to exclusion
Removed 0 subjects


In [42]:
study1_subject_counts = studyI_GL.groupby('sona').count()
study1_task_counts = studyI_GL.groupby('subject').count()
study2_subject_counts = studyII_GL.groupby('sona').count()
study2_task_counts = studyII_GL.groupby('subject').count()
study_likert_subject_counts = study_likert.groupby('sona').count()
study_likert_task_counts = study_likert.groupby('subject').count()
study_uniform_subject_counts = study_uniform.groupby('sona').count()
study_uniform_task_counts = study_uniform.groupby('subject').count()
study_tritave_uniform_subject_counts = study_tritave_uniform.groupby('sona').count()
study_tritave_uniform_task_counts = study_tritave_uniform.groupby('subject').count()
study_tritave_pentachords_subject_counts = study_tritave_pentachords.groupby('sona').count()
study_tritave_pentachords_task_counts = study_tritave_pentachords.groupby('subject').count()
print("Post exclusion counts:")
print("Study I: {} subjects, {} tasks".format(study1_subject_counts.shape[0], study1_task_counts.shape[0]))
print("Study II: {} subjects, {} tasks".format(study2_subject_counts.shape[0], study2_task_counts.shape[0]))
print("Likert: {} subjects, {} tasks".format(study_likert_subject_counts.shape[0], study_likert_task_counts.shape[0]))
print("Uniform: {} subjects, {} tasks".format(study_uniform_subject_counts.shape[0], study_uniform_task_counts.shape[0]))
print("Tritave Uniform: {} subjects, {} tasks".format(study_tritave_uniform_subject_counts.shape[0], study_tritave_uniform_task_counts.shape[0]))
print("Tritave Pentachord: {} subjects, {} tasks".format(study_tritave_pentachords_subject_counts.shape[0], study_tritave_pentachords_task_counts.shape[0]))

Post exclusion counts:
Study I: 98 subjects, 98 tasks
Study II: 568 subjects, 630 tasks
Likert: 147 subjects, 145 tasks
Uniform: 47 subjects, 47 tasks
Tritave Uniform: 27 subjects, 27 tasks
Tritave Pentachord: 35 subjects, 35 tasks


## Save data post-exlusion
### Study I

In [43]:
# Save Study I post-exclusion
studyI_GL.to_pickle(StudyI_paths.processed_dir + StudyI_paths.post_exclusion_data_pickle_filename)

### Study II

In [44]:
# Save Study II post-exclusion
studyII_GL.to_pickle(StudyII_paths.processed_dir + StudyII_paths.post_exclusion_data_pickle_filename)

### Likert Experiment

In [45]:
# Save Likert post-exclusion
study_likert.to_pickle(Study_likert_paths.processed_dir + Study_likert_paths.post_exclusion_data_pickle_filename)

### Uniform Sets Experiment


In [46]:
# Save Uniform post-exclusion
study_uniform.to_pickle(Study_uniform_paths.processed_dir + Study_uniform_paths.post_exclusion_data_pickle_filename)

### Tritave Uniform Sets Experiment


In [47]:
# Save Tritave Uniform post-exclusion
study_tritave_uniform.to_pickle(Study_tritave_uniform_paths.processed_dir + Study_tritave_uniform_paths.post_exclusion_data_pickle_filename)

### Tritave Pentachord Sets Experiment


In [48]:
# Save Tritave Pentachord post-exclusion
study_tritave_pentachords.to_pickle(Study_tritave_pentachords_paths.processed_dir + Study_tritave_pentachords_paths.post_exclusion_data_pickle_filename)