<a href="https://colab.research.google.com/github/RumbaughLab/colab/blob/main/RumbaughLab_psyTrack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# _Implementation of psyTrack for Rumbaugh Lab_
## Figure Generator

relevant to:
1. Cris Creson
2. Sheldon Michaelson
3. Randy Golvin
4. Tom Vaissiere


_(v1.0, last updated January, 26, 2022)_

---

This notebook will attempt at the implementation of psyTrack on WDIL data and especially the WDIL dataset WDIL0007 which are located in a shared folder.

Important note: modification of the inuputs should enable analysis of other WDIL dataset

Several additional layres can be incorporated like:


1.   Stimulus intensities
2.   Number of lick in specific interval
3.   pick pupil diameter
4.   etc.

References:


*   [psyTrack](https://github.com/nicholas-roy/psytrack)
*   [paper](<https://www.cell.com/neuron/fulltext/S0896-6273(20)30963-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0896627320309636%3Fshowall%3Dtrue>) and [colab](https://tinyurl.com/PsyTrack-colab)





# Preliminary setup

Libraries listed below will need to be install.
In the original notebook the useage of oneibl was problematic for the first part of the paper was problematic. Those libraries and code focus mostly on implementation on the Rumbaugh lab data for comparison with the original colab see [here](https://tinyurl.com/PsyTrack-colab)

In [None]:
import os
import re
from IPython.display import clear_output
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import glob
import copy
import path
import sys

# Install then import PsyTrack
!pip install psytrack==2.0
import psytrack as psy

# Set save path for all figures, decide whether to save permanently
SPATH = "Figures/"
!mkdir -p "{SPATH}"

# Set matplotlib defaults for making files consistent in Illustrator
colors = psy.COLORS
zorder = psy.ZORDER
plt.rcParams['figure.dpi'] = 140
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['savefig.facecolor'] = (1,1,1,0)
plt.rcParams['savefig.bbox'] = "tight"
plt.rcParams['font.size'] = 10
# plt.rcParams['font.family'] = 'sans-serif'     # not available in Colab
# plt.rcParams['font.sans-serif'] = 'Helvetica'  # not available in Colab
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['axes.labelsize'] = 12

clear_output()

# Custom functions

In [None]:
def formatWDILfile(path):
    '''
    Function to concatenate all the wdil file into one 
    to be able to run psytrack

    Args:
    path (str): with all the files 
    '''

    allFiles = set(glob.glob(path+'/**/*.xlsx', recursive=True)) # get all the excel file in folder
    settingFiles = set(glob.glob(path+'/**/settings.xlsx', recursive = True)) # get all the settings file

    wdilFiles = allFiles - settingFiles # exclude the settings files
    wdilFiles = list(wdilFiles) # convert the set to a list

    print('alllFiles: ', len(allFiles), ' wdilFiles: ', len(wdilFiles))
    print(wdilFiles)

    # important to sort the list to obtain proper sequence 
    wdilFiles.sort()

    allDat = [] # create an empty object to 
    sID = []
    
    for i in wdilFiles:
        # print(i)
        tmp = pd.read_excel(i) # read excel file

        ## section to check with previous id and assign absolute order number
        tmpsID = i.split(os.sep)[-3]
        if tmpsID == sID:
            # print('same')
            k += 1
        else:
            k=0
        # print(k)


        sID = i.split(os.sep)[-3] # add a column with the id
        sessionDate = i.split(os.sep)[-2]
        tmp['sID'] = sID
        tmp['sessionDate'] = sessionDate
        tmp['session'] = k
        # absSession = # obtain the absolute session number

        allDat.append(tmp)
    allDat = pd.concat(allDat)

    return allDat

def codingDatFile(allDat):
    '''
    Function to code all the wdil file into one 
    to be able to run psytrack

    Args:
    allDat
    '''

    allDat['choice'] = allDat['Lick?']
    allDat = allDat.rename(columns={'Trial#':'trial'})
    ## establish what are the hit 
    ## in this case the establishment of hit correspond to true hit:
    ##      lick and was a Go # if Go=1 and Correct =1 --> CorrectCat ==2 and is a hit
    ## as well as correct rejection:
    ##      with held and was a no Go # if Go=0 and Correct =0 --> CorrectCat ==0 and is a hit
    allDat['CorrectCat']  = allDat['choice'] + allDat['Correct?'] 
    allDat['hit'] = np.where((allDat['CorrectCat']==2) | (allDat['CorrectCat']==0),1,0)

def getRat(subject, first=20000, cutoff=50):

    df = RAT_DF[RAT_DF['subject_id']==subject]  # restrict dataset to single subject
    df = df[:first]  # restrict to "first" trials of data
    # remove sessions with fewer than "cutoff" valid trials
    df = df.groupby('session').filter(lambda x: len(x) >= cutoff)   

    # Normalize the stimuli to standard normal
    s_a = (df["s_a"] - np.mean(df["s_a"]))/np.std(df["s_a"])
    s_b = (df["s_b"] - np.mean(df["s_b"]))/np.std(df["s_b"])
    
    # Determine which trials do not have a valid previous trial (mistrial or session boundary)
    t = np.array(df["trial"])
    prior = ((t[1:] - t[:-1]) == 1).astype(int)
    prior = np.hstack(([0], prior))

    # Calculate previous average tone value
    s_avg = (df["s_a"][:-1] + df["s_b"][:-1])/2
    s_avg = (s_avg - np.mean(s_avg))/np.std(s_avg)
    s_avg = np.hstack(([0], s_avg))
    s_avg = s_avg * prior  # for trials without a valid previous trial, set to 0

    # Calculate previous correct answer
    h = (df["correct_side"][:-1] * 2 - 1).astype(int)   # map from (0,1) to (-1,1)
    h = np.hstack(([0], h))
    h = h * prior  # for trials without a valid previous trial, set to 0
    
    # Calculate previous choice
    c = (df["choice"][:-1] * 2 - 1).astype(int)   # map from (0,1) to (-1,1)
    c = np.hstack(([0], c))
    c = c * prior  # for trials without a valid previous trial, set to 0
    
    inputs = dict(s_a = np.array(s_a)[:, None],
                  s_b = np.array(s_b)[:, None],
                  s_avg = np.array(s_avg)[:, None],
                  h = np.array(h)[:, None],
                  c = np.array(c)[:, None])

    dat = dict(
        subject = subject,
        inputs = inputs,
        s_a = np.array(df['s_a']),
        s_b = np.array(df['s_b']),
        correct = np.array(df['hit']),
        answer = np.array(df['correct_side']),
        y = np.array(df['choice']),
        dayLength=np.array(df.groupby(['session']).size()),
    )
    return dat

def convertToDict(allDat, subject, first=20000, cutoff=50):

    '''
    equivalent to the function getRat from the paper see above and here https://tinyurl.com/PsyTrack-colab
    '''

    df = allDat[allDat['sID']==subject]  # restrict dataset to single subject
    df = df[:first] # restrict to "first" trials of data
    # # remove sessions with fewer than "cutoff" valid trials
    # df = df.groupby('session').filter(lambda x: len(x) >= cutoff)   

    # Determine which trials do not have a valid previous trial (mistrial or session boundary)
    t = np.array(df["trial"])
    prior = ((t[1:] - t[:-1]) == 1).astype(int)
    prior = np.hstack(([0], prior))

    # Calculate previous correct answer
    h = (df["Correct?"][:-1] * 2 - 1).astype(int)   # map from (0,1) to (-1,1)
    h = np.hstack(([0], h))
    h = h * prior  # for trials without a valid previous trial, set to 0
    
    # Calculate previous choice
    c = (df["choice"][:-1] * 2 - 1).astype(int)   # map from (0,1) to (-1,1)
    c = np.hstack(([0], c))
    c = c * prior  # for trials without a valid previous trial, set to 0
    
    # note here that it could be useful to have different stimulus values 
    inputs = dict(stim = np.array(df['Go/NoGo'])[:, None], 
                  h = np.array(h)[:, None],
                  c = np.array(c)[:, None])

    dat = dict(
        subject = subject,
        inputs = inputs,
        stim = np.array(df['Go/NoGo']), # correspond to the go/noGo stim
        correct = np.array(df['hit']), # hit correspond to hit and correct rejection
        answer = np.array(df['Correct?']), #this is the answer 
        y = np.array(df['choice']), #this correspond to the Lick
        dayLength=np.array(df.groupby(['session']).size()),
    )

    return dat

def tpath(mypath, shareDrive = 'Y'):
    '''
    path conversion to switch form linux to windows platform with define drive
    Args:
    mypath (str): path of the file of interest
    shareDrive (str): windows letter of the shared folder
    '''
    if sys.platform == 'linux':
        myRoot = '/run/user/1000/gvfs/smb-share:server=ishtar,share=millerrumbaughlab'
    else:
        myRoot = shareDrive+':'

    newpath = myRoot+os.sep+mypath

    return newpath


# WDIL data