# Exploratory Data Analysis of 2018 ACC Lesion Study 

## Imports

In [1]:
import numpy as np
import polars as pl 
import pandas as pd 
import bambi as bmb
import arviz as az 
import pymc as pm
import platform
from scipy.ndimage import gaussian_filter1d
import plotly.express as px 
from IPython.display import display
from matching_pennies.io.metrics_store import load_metrics
from matching_pennies.analysis.bambi_plots import plot_trace
from matching_pennies.utils import preprocessing, plotting 

## Loading Data from Config 

In [2]:
EXPERIMENT = "2018_acc_lesion"
PARADIGM = "post_sx_testing" 

tdf_post, sdf_post, manifest = load_metrics(EXPERIMENT, PARADIGM, root="C:/Users/benli/Documents/code/matching-pennies/data")

## Cursory Data Exploration

Answer some basic questions: 
1. How many animals are there total 
2. How many trials did each animal complete in each session
3. Does each animal have the same number of sessions? 

### Animals

How many animals are there total? 

In [None]:
n = len(sdf_post["animal_id"].unique())

18

How many animals in each treatment group? 

In [54]:
group_ids = (
    sdf_post
    .group_by("treatment")
    .agg(
        pl.col("animal_id").unique(),
        pl.col("animal_id").n_unique().alias("n_animals")
    )
)
group_ids

treatment,animal_id,n_animals
cat,list[cat],u32
,"[""P4026""]",1
"""sham""","[""P4023"", ""P4024"", … ""P4037""]",7
"""acc""","[""P4020"", ""P4021"", … ""P4036""]",10


### Treatment

One animal does not have a treatment group assigned. cut them out of the dataset. 

In [71]:
null_treatment = group_ids.filter(pl.col("treatment").is_null()).get_column("animal_id")[0][0]
null_treatment

'P4026'

In [73]:
sdf_post = sdf_post.filter(pl.col("animal_id") != null_treatment)
tdf_post = tdf_post.filter(pl.col("animal_id") != null_treatment)

null_treatment in sdf_post["animal_id"].unique()

False

Set treatment order to use in plotting later

In [78]:
treat_order = ["sham", "acc"] 
cat = {"treatment": treat_order}

### Number of sessions

In [74]:
num_sessions = sdf_post.group_by(["animal_id"]).agg(pl.col("session_idx").max().alias("num_sessions"))
ses_set = num_sessions["num_sessions"].unique()
ses_set

num_sessions
i32
3


`ses_set` shows that all animals completed 3 sessions.

### Trial Numbers

Next lets look at the minimum, maximum, and mean number of trials 

In [75]:
sdf_post.group_by(["treatment"]).agg(
    pl.col("NumTrials").min().alias("minimum_trials"),
    pl.col("NumTrials").max().alias("maximum_trials"),
    pl.mean("NumTrials").alias("mean_trials"), 
    pl.median("NumTrials").alias("median_trials"),
    pl.std("NumTrials").alias("sd_trials")
)

treatment,minimum_trials,maximum_trials,mean_trials,median_trials,sd_trials
cat,u32,u32,f64,f64,f64
"""acc""",245,453,341.4,338.0,53.827054
"""sham""",199,435,324.666667,329.0,65.232916


Plot a histogram for the number of trials in each session

In [83]:
fig = px.histogram(
    sdf_post,
    x="NumTrials",
    color="treatment",
    category_orders=cat,
    barmode="overlay",
    histnorm="probability"
)
fig.show()