<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">KSO Tutorials #12: Analyse Zooniverse classifications</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Sept 29th, 2021</h5>

# Set up and requirements

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys
sys.path.append('..')

# Set to display dataframes as interactive tables
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

# Import required modules
import utils.tutorials_utils as t_utils
import utils.t12_utils as t12
import utils.zooniverse_utils as zoo

print("Packages loaded successfully")

### Choose your project

In [None]:
project = t_utils.choose_project()

### Initiate SQL database and populate sites, movies and species

In [None]:
# Specify the path of the movies 
movies_path = "/uploads"

# Specify the path of the sql database
db_path = "koster_lab.db"

# Initiate the SQL database 
%run -i "../db_starter/starter.py" --movies_path $movies_path --db_path $db_path --project_name project.value

### Retrieve Zooniverse information

In [None]:
# Save your Zooniverse user name and password.
zoo_user, zoo_pass = zoo.zoo_credentials()

In [None]:
# Specify the Zooniverse information required throughout the tutorial
zoo_info = ["subjects", "workflows", "classifications"]

# Retrieve and store the Zooniverse information required throughout the tutorial in a dictionary
project_n, zoo_info_dict = zoo.retrieve_zoo_info(zoo_user, zoo_pass, project.value, zoo_info)

In [None]:
# Populate the sql with subjects uploaded to Zooniverse
zoo.populate_subjects(zoo_info_dict["subjects"], project.value, db_path)

### Step 1: Specify the Zooniverse workflow id and version of interest

*Note:  A manual export in Zooniverse is required to get the most up-to-date classifications here**

Make sure your workflows in Zooniverse have different names to avoid issues while selecting the workflow id

In [None]:
# Display a selectable list of workflow names and the type of subject
workflows_df = zoo_info_dict["workflows"]
workflow_name, subj_type = t12.choose_workflows(workflows_df)

In [None]:
# Selects the workflow id based on the workflow name
workflow_id = workflows_df[workflows_df.display_name==workflow_name.value].workflow_id.unique()[0]

# Display a selectable list of versions of the workflow of interest
workflow_version = t12.choose_w_version(workflows_df, workflow_id)

In [None]:
# Retrieve classifications from the workflow of interest
class_df = t12.get_classifications(workflow_id,
                                   workflow_version.value, 
                                   subj_type.value, 
                                   zoo_info_dict["classifications"],
                                   db_path)

### Step 2: Aggregate classifications received on the workflow of interest

In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t12.choose_agg_parameters(subj_type.value)

In [None]:
agg_class_df, raw_class_df = t12.aggregrate_classifications(class_df, subj_type.value, project.value, agg_params)

### Step 3: Summarise the number of classifications based on the agreement specified

In [None]:
agg_class_df.groupby("label")["subject_ids"].agg("count")

### Step 4: Display the aggregated classifications in a table

In [None]:
# Display the dataframe into a table
if subj_type.value == "clip":
    a = agg_class_df[["subject_ids","label","how_many","first_seen"]]
else:
    a = agg_class_df
a

### Step 5: Use the subject explorer widget to visualise subjects and their aggregated classifications

In [None]:
# Launch the subject viewer
t12.launch_viewer(agg_class_df, subj_type.value)

### Step 6: Use the subject explorer widget to get more information about specific subjects and their "raw" classifications

In [None]:
# Launch the classifications_per_subject explorer
t12.explore_classifications_per_subject(raw_class_df, subj_type.value)

In [None]:
# END