# Imports




In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Pull IBL data from github
Will also filter out untrained mice

In [None]:
# install IBL pipeline package to access and navigate the pipeline
!pip install --quiet nma-ibl

# Download data needed for plot recreation
!wget https://github.com/vathes/nma-ibl/raw/master/uuids_trained1.npy

[K     |████████████████████████████████| 101 kB 5.8 MB/s 
[K     |████████████████████████████████| 4.1 MB 37.3 MB/s 
[K     |████████████████████████████████| 76 kB 1.1 MB/s 
[K     |████████████████████████████████| 43 kB 1.4 MB/s 
[K     |████████████████████████████████| 78 kB 6.5 MB/s 
[?25h--2022-07-26 22:17:52--  https://github.com/vathes/nma-ibl/raw/master/uuids_trained1.npy
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://github.com/datajoint-company/nma-ibl/raw/master/uuids_trained1.npy [following]
--2022-07-26 22:17:53--  https://github.com/datajoint-company/nma-ibl/raw/master/uuids_trained1.npy
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/datajoint-company/nma-ibl/master/uuids_trained1.npy [following]
--2022-07-26 22:17:53--

In [None]:
import datajoint as dj
dj.config['database.host'] = 'datajoint-public.internationalbrainlab.org'
dj.config['database.user'] = 'ibl-public'
dj.config['database.password'] = 'ibl-public'
dj.conn() # explicitly verify that the connection to database can be established

[2022-07-26 22:17:54,436][INFO]: Connecting ibl-public@datajoint-public.internationalbrainlab.org:3306
[2022-07-26 22:17:55,297][INFO]: Connected ibl-public@datajoint-public.internationalbrainlab.org:3306


DataJoint connection (connected) ibl-public@datajoint-public.internationalbrainlab.org:3306

In [None]:
from nma_ibl.paper_behavior_functions import (query_subjects, seaborn_style,
                                              group_colors, institution_map, seaborn_style)

from nma_ibl import behavior, subject, behavior_analyses  # this is all we are using for now

Gather the information to find trained mice. This is stored in trained_mouse_trials

In [None]:
uuids = np.load('uuids_trained1.npy', allow_pickle=True)
subjects = subject.Subject & [{'subject_uuid': uuid} for uuid in uuids]
use_subjects = query_subjects()
behavior_analyses.BehavioralSummaryByDate()
b = behavior_analyses.BehavioralSummaryByDate * use_subjects
trained_mouse_trials = behavior.TrialSet.Trial * use_subjects

In [None]:
#lab_names_column = trained_mouse_trials.fetch('lab_name')

# Build a pandas dataframe from ALL the trained_mouse_trials infromation

In [None]:
data = trained_mouse_trials.fetch() # took approx 6min

In [None]:
pd_data_all = pd.DataFrame(data)  # took approx 6sec

# Add in required columns, and pop the useless ones
- Response Duration 
- Stimulus Delay
- Stimulus Location
- Stimulus Contrast
- Phase of Session

In [None]:
pd_data_all.pop('trial_response_choice')
pd_data_all.pop('trial_reward_volume')
pd_data_all.pop('trial_included')
pd_data_all.pop('subject_project')
pd_data_all.pop('trial_rep_num')
pd_data_all.pop('trial_stim_prob_left')
pd_data_all.pop('trial_go_cue_time')
pd_data_all.pop('trial_go_cue_trigger_time')

In [None]:
response_duration = pd_data_all['trial_response_time'] - pd_data_all['trial_stim_on_time']
pd_data_all.insert(3, 'response_duration',response_duration)
#response_duration

stimulus_delay = pd_data_all['trial_stim_on_time'] - pd_data_all['trial_start_time']
pd_data_all.insert(4, 'stimulus_delay',stimulus_delay)
#stimulus_delay

In [None]:
# Now, we need to make the columns Stimulus Location and Stimulus Contrast

stimulus_location = np.zeros(4047617, dtype=int)
stimulus_contrast = np.zeros(4047617, dtype=float)

# cycle through trial_stim_contrast_left (and refer to trial_stim_contrast_right)
# in order to fill in our arrays

for index in range(4047617):
  element = pd_data_all['trial_stim_contrast_left'][index]  # get the value of contrast left
  
  if (element == 0):  # if the stim_cont_left is 0 that implies the stimulus was on the right
    stimulus_location[index] = 2  # 2 IS RIGHT
    stimulus_contrast[index] = pd_data_all['trial_stim_contrast_right'][index]
  
  else :  # if the stim_cont_left is NOT 0 that implies the stimulus was on the left
    stimulus_location[index] = 1  # 1 IS LEFT
    stimulus_contrast[index] = element


In [None]:
pd_data_all.insert(6, 'stimulus_location',stimulus_location)
pd_data_all.insert(6, 'stimulus_contrast',stimulus_contrast)

In [None]:
pd_data_all['session_phase'] = pd_data_all.groupby(['subject_nickname', 'session_start_time'])['trial_id'].transform(
    lambda x: pd.qcut(x, 3, labels=[1, 2, 3]))

session_phase = pd_data_all.pop('session_phase')
pd_data_all.insert(6, 'session_phase',session_phase)

In [None]:
pd_data_all['normalized_stimulus_delay'] = pd_data_all.groupby(['subject_nickname'])['stimulus_delay'].transform(
    lambda x: (x - np.nanmean(x)) / (np.nanstd(x)) )


In [None]:
pd_data_all['trial_feedback_type'] = pd_data_all['trial_feedback_type'].shift(1) #shift downwards
pd_data_all = pd_data_all.drop(0) # delete the first row

In [None]:
pd_data_all.pop('subject_uuid')
pd_data_all.pop('trial_start_time')
pd_data_all.pop('trial_end_time')
pd_data_all.pop('trial_response_time')
pd_data_all.pop('trial_stim_on_time')
pd_data_all.pop('trial_stim_contrast_left')
pd_data_all.pop('trial_stim_contrast_right')
pd_data_all.pop('trial_feedback_time')
pd_data_all.pop('trial_iti_duration')
pd_data_all.pop('institution_short')
pd_data_all.pop('date_trained')

In [None]:
subject_nickname = pd_data_all.pop('subject_nickname')
pd_data_all.insert(0, 'subject_nickname',subject_nickname)

In [None]:
lab_name = pd_data_all.pop('lab_name')
pd_data_all.insert(10, 'lab_name',lab_name)

In [None]:
pd_data_all['normalized_response_duration'] = pd_data_all.groupby(['subject_nickname'])['response_duration'].transform(
    lambda x: (x - np.nanmean(x)) / (np.nanstd(x)) )

In [None]:
normalized_response_duration = pd_data_all.pop('normalized_response_duration')
pd_data_all.insert(3, 'normalized_response_duration',normalized_response_duration)

normalized_stimulus_delay = pd_data_all.pop('normalized_stimulus_delay')
pd_data_all.insert(4, 'normalized_stimulus_delay',normalized_stimulus_delay)

In [None]:
pd_data_all

Unnamed: 0,subject_nickname,session_start_time,trial_id,normalized_response_duration,normalized_stimulus_delay,response_duration,stimulus_delay,session_phase,stimulus_contrast,stimulus_location,trial_feedback_type,lab_name
1,KS019,2019-08-10 11:24:59,2,1.125428,-0.260417,6.2972,0.9265,1,1.0000,1,-1,cortexlab
2,KS019,2019-08-10 11:24:59,3,3.965963,-0.506389,18.2094,0.6554,1,1.0000,1,-1,cortexlab
3,KS019,2019-08-10 11:24:59,4,0.096586,1.184018,1.9826,2.5185,1,1.0000,1,-1,cortexlab
4,KS019,2019-08-10 11:24:59,5,1.490599,-0.456940,7.8286,0.7099,1,1.0000,2,-1,cortexlab
5,KS019,2019-08-10 11:24:59,6,1.335031,-0.389346,7.1762,0.7844,1,1.0000,1,1,cortexlab
...,...,...,...,...,...,...,...,...,...,...,...,...
4047612,DY_009,2020-03-13 16:04:11,818,-0.402557,1.710189,0.4818,1.9877,3,1.0000,2,-1,danlab
4047613,DY_009,2020-03-13 16:04:11,819,-0.438394,-0.329457,0.1813,0.6093,3,1.0000,1,1,danlab
4047614,DY_009,2020-03-13 16:04:11,820,-0.414435,-0.457305,0.3822,0.5229,3,0.1250,2,1,danlab
4047615,DY_009,2020-03-13 16:04:11,821,-0.375808,-0.312588,0.7061,0.6207,3,0.0000,2,1,danlab


In [None]:
#df = pd_data_all
#df[df["lab_name"] == 'churchlandlab'] 

In [None]:
pd_data_all = pd_data_all.dropna()

In [None]:
#remove unnormalized columns
pd_data_all.pop('response_duration')
pd_data_all.pop('stimulus_delay')

1          0.9265
2          0.6554
3          2.5185
4          0.7099
5          0.7844
            ...  
4047612    1.9877
4047613    0.6093
4047614    0.5229
4047615    0.6207
4047616    1.6319
Name: stimulus_delay, Length: 3862115, dtype: float64

In [None]:
pd_data_all

Unnamed: 0,subject_nickname,session_start_time,trial_id,normalized_response_duration,normalized_stimulus_delay,session_phase,stimulus_contrast,stimulus_location,trial_feedback_type,lab_name
1,KS019,2019-08-10 11:24:59,2,1.125428,-0.260417,1,1.0000,1,-1,cortexlab
2,KS019,2019-08-10 11:24:59,3,3.965963,-0.506389,1,1.0000,1,-1,cortexlab
3,KS019,2019-08-10 11:24:59,4,0.096586,1.184018,1,1.0000,1,-1,cortexlab
4,KS019,2019-08-10 11:24:59,5,1.490599,-0.456940,1,1.0000,2,-1,cortexlab
5,KS019,2019-08-10 11:24:59,6,1.335031,-0.389346,1,1.0000,1,1,cortexlab
...,...,...,...,...,...,...,...,...,...,...
4047612,DY_009,2020-03-13 16:04:11,818,-0.402557,1.710189,3,1.0000,2,-1,danlab
4047613,DY_009,2020-03-13 16:04:11,819,-0.438394,-0.329457,3,1.0000,1,1,danlab
4047614,DY_009,2020-03-13 16:04:11,820,-0.414435,-0.457305,3,0.1250,2,1,danlab
4047615,DY_009,2020-03-13 16:04:11,821,-0.375808,-0.312588,3,0.0000,2,1,danlab


In [None]:
#df = pd_data_all
#df[df["lab_name"] == 'churchlandlab'] 

In [None]:
pd_data_all.to_csv("/content/drive/MyDrive/Neuromatch_Project_Folder/mouse_data_cleaned_normalized.csv", index = False)

In [2]:
import pandas as pd

In [4]:
df = pd.read_csv('/content/drive/MyDrive/Neuromatch_Project_Folder/mouse_data_cleaned_normalized.csv')

Unnamed: 0,subject_nickname,session_start_time,trial_id,normalized_response_duration,normalized_stimulus_delay,session_phase,stimulus_contrast,stimulus_location,trial_feedback_type,lab_name
0,KS019,2019-08-10 11:24:59,2,1.125428,-0.260417,1,1.0000,1,-1,cortexlab
1,KS019,2019-08-10 11:24:59,3,3.965963,-0.506389,1,1.0000,1,-1,cortexlab
2,KS019,2019-08-10 11:24:59,4,0.096586,1.184018,1,1.0000,1,-1,cortexlab
3,KS019,2019-08-10 11:24:59,5,1.490599,-0.456940,1,1.0000,2,-1,cortexlab
4,KS019,2019-08-10 11:24:59,6,1.335031,-0.389346,1,1.0000,1,1,cortexlab
...,...,...,...,...,...,...,...,...,...,...
3862110,DY_009,2020-03-13 16:04:11,818,-0.402557,1.710189,3,1.0000,2,-1,danlab
3862111,DY_009,2020-03-13 16:04:11,819,-0.438394,-0.329457,3,1.0000,1,1,danlab
3862112,DY_009,2020-03-13 16:04:11,820,-0.414435,-0.457305,3,0.1250,2,1,danlab
3862113,DY_009,2020-03-13 16:04:11,821,-0.375808,-0.312588,3,0.0000,2,1,danlab


In [19]:
lab_names_unique = df['lab_name'].unique()
lab_names_unique

array(['cortexlab', 'mrsicflogellab', 'wittenlab', 'mainenlab',
       'zadorlab', 'churchlandlab', 'danlab', 'angelakilab', 'hoferlab'],
      dtype=object)

In [56]:
groups = df.groupby(['lab_name'], as_index = False)

In [61]:
groups.get_group('cortexlab')['stimulus_contrast'].unique()

array([1.    , 0.5   , 0.25  , 0.125 , 0.0625, 0.    ])

In [63]:
names = ['cortexlab', 'mrsicflogellab', 'wittenlab', 'mainenlab',
       'zadorlab', 'churchlandlab', 'danlab', 'angelakilab', 'hoferlab']

In [70]:
for name in names:
  u = groups.get_group(name)['stimulus_contrast'].unique()
  u.sort()
  print(str(name) + ":" + str(u))

cortexlab:[0.     0.0625 0.125  0.25   0.5    1.    ]
mrsicflogellab:[0.     0.0625 0.125  0.25   0.5    1.    ]
wittenlab:[0.     0.0625 0.125  0.25   0.5    1.    ]
mainenlab:[0.     0.04   0.0625 0.08   0.125  0.25   0.5    1.    ]
zadorlab:[0.     0.0625 0.125  0.25   0.5    1.    ]
churchlandlab:[0.     0.0625 0.125  0.25   0.5    1.    ]
danlab:[0.     0.0625 0.125  0.25   0.5    1.    ]
angelakilab:[0.     0.0625 0.125  0.25   0.5    1.    ]
hoferlab:[0.     0.0625 0.125  0.25   0.5    1.    ]


In [None]:
# Read the csv
#pd.read_csv("All_mouse_data.csv")

In [None]:
#pd_data_all = pd.DataFrame(data)
#pd_data_all.to_csv("/content/drive/MyDrive/Neuromatch_Project_Folder/All_mouse_data.csv", index = False)

In [None]:
#cshl_trials_all_dj = trained_mouse_trials & 'lab_name = "churchlandlab"'
#cshl_trials_all_dj