In [24]:
import pandas as pd
import numpy as np
import seaborn as sns
import polars as pl
import matplotlib.pyplot as plt
from matplotlib import pyplot

In [25]:
from sklearn.preprocessing import MinMaxScaler

In [26]:
'''
function to check that tail_base was also inside arm for a list of nose frames
'''
def check_tail_entry(df_mouse_boolean, list_of_frames, ti):
    f=0; #tail entry frame, if valid
    fc=0;
    entered=False;
    for l in list_of_frames:
        if df_mouse_boolean.iloc[l,ti]==1:
            f=l;
            fc = fc+1;
        if fc>5:
            f = f-5;
            entered= True;
            break;
    return entered, f;

In [27]:
'''
function to check when tail_base exited
'''
def check_tail_exit(df_mouse_boolean, startf, ti):
    xf=0; #tail entry frame, if valid
    fc=0;
    end= False;
    for l in range(startf, len(df_mouse_boolean)):
        if df_mouse_boolean.iloc[l,ti]==0:
            xf = l;
            fc = fc+1;
        else:
            fc=0;
        if fc>5:
            xf=l-5;
            break;
    return xf;

In [28]:
'''
function to exclude repeat entries (when for a new nose entry, tail base is already inside arm)
'''
def check_repeat_entries(df_entries_stats_sorted):
    df_valid_entries_stats_sorted= df_entries_stats_sorted.drop_duplicates(subset=['Tail Exit Frame'], keep='first')
    return df_valid_entries_stats_sorted;

In [29]:
'''
function to get the max distance from center roi for a list of frames
'''
def get_max_dist(df_mouse_dist_roi, stf, enf,fi):
    max = 0;
    max_frame=0;
    for frame in range(stf, (enf+1)):
        temp = df_mouse_dist_roi.iloc[frame,fi];
        if temp >= max:
            max = temp;
            max_frame=frame;
    return max, max_frame;   

In [30]:
'''
main function will itirate through all frames for each nose inside arm X - from mouse_boolean.csv
then call function to validate if entry is valid (tail is also entered for more than 5 frames) - returns True
then once True, calculate entry latency - time for tail base to fully enter from nose sucessful entry
then from the moment tail enters calculate duration - until tail leave
calculate movement of nose and tail during the duration
then calculate exit latency - time for tail base to fully exit from nose sucessful exit

That will produce table as follows:
    entry frame    exit frame    duration   nose mov nose velocity  tail mov  tail velocity   entry latency   exit latency  nose entry frame  nose exit frame (both for later directionality purposes)  
A
A
A
B
B
C
C
D
D
E
E
E
'''
def valid_entries_stats(df_mouse_boolean, df_mouse_dist_roi, mouse_id):
    # 11 nose in A, 12 nose in B, ....
    entries = [];
    tail_entry_frame=[];
    tail_exit_frame=[];
    duration=[];
    entry_latency=[];
    exit_latency=[];
    
    nose_max_dist=[];
    nose_max_dist_frame=[];
    nose_max_dist_time=[];
    
    tail_base_max_dist=[];
    tail_base_max_dist_frame=[];
    tail_base_max_dist_time=[];
    
    nose_entry_frame=[];
    nose_exit_frame=[];

    arms = ['A', 'B', 'C', 'D', 'E']
    
    ina = df_mouse_boolean.columns.get_loc('Animal_1 nose inside polygon A (Boolean)')
    ine = df_mouse_boolean.columns.get_loc('Animal_1 nose inside polygon E (Boolean)')
    ita = df_mouse_boolean.columns.get_loc('Animal_1 tail_base inside polygon A (Boolean)')
    dif = ita-ina

    for i in range(ina,ine+1):
        temp_ent = [];
        for frame in range(0, (len(df_mouse_boolean)-1)):
            if df_mouse_boolean.iloc[frame,i]==1:
                temp_end = temp_ent.append(frame);
            else:
                if temp_ent: #if list is not empty
                    valid, tail_entry = check_tail_entry(df_mouse_boolean, temp_ent, i+dif);
                    if valid: # if tail_base is 1 for any
                        entries.append(arms[i-ina])
                        tail_entry_frame.append(tail_entry);
                        tail_exit = check_tail_exit(df_mouse_boolean, temp_ent[-1], i+dif);
                        tail_exit_frame.append(tail_exit);
                        
                        nose_entry_frame.append(temp_ent[0]);
                        nose_exit_frame.append(temp_ent[-1]);
                        
                        ent_lat = (tail_entry-temp_ent[0])/30;
                        ent_lat = ent_lat>=0 and ent_lat or 0
                        entry_latency.append(ent_lat);            
                        
                        #dur =  (tail_exit-tail_entry+1)/30; #duration in s
                        dur =  (temp_ent[-1]-tail_entry)/30; #duration in s 
                        duration.append(dur);
                        
                        ft = df_mouse_dist_roi.columns.get_loc('Animal_1 tail_base to Middle center distance (mm)')
                        tail_base_max, tail_base_max_frame = get_max_dist(df_mouse_dist_roi, tail_entry, tail_exit,ft)
                        tail_base_max_dist.append(tail_base_max)    
                        tail_base_max_dist_frame.append(tail_base_max_frame)
                        tmaxdur = (tail_base_max_frame-tail_entry)/30
                        tail_base_max_dist_time.append(tmaxdur)
                        
                        
                        fn = df_mouse_dist_roi.columns.get_loc('Animal_1 nose to Middle center distance (mm)')
                        nose_max, nose_max_frame = get_max_dist(df_mouse_dist_roi, temp_ent[0], temp_ent[-1],fn)
                        nose_max_dist.append(nose_max)    
                        nose_max_dist_frame.append(nose_max_frame)
                        nmaxdur = (nose_max_frame-temp_ent[0])/30
                        nose_max_dist_time.append(nmaxdur)
                                   
                        exit_lat = (tail_exit-temp_ent[-1])/30;
                        exit_lat = exit_lat>=0 and exit_lat or 0
                        exit_latency.append(exit_lat);

                    temp_ent = []; #reset for next entry vals

        
    valid_entries_stats ={'Arm Entry':entries, 'Tail Entry Frame':tail_entry_frame, 'Tail Exit Frame':tail_exit_frame, 'Duration':duration, 
                          'Entry Latency':entry_latency, 'Exit Latency':exit_latency, 'Nose Max Distance':nose_max_dist, 
                          'Nose Max Dist Frame':nose_max_dist_frame, 'Nose Time to Max Dist':nose_max_dist_time,
                          'Tail Base Max Distance':tail_base_max_dist, 'Tail Base Max Dist Frame':tail_base_max_dist_frame,
                          'Tail Base Time to Max Dist':tail_base_max_dist_time, 'Nose Entry Frame':nose_entry_frame, 
                          'Nose Exit Frame':nose_exit_frame}
        
    df_valid_entries_stats = pd.DataFrame(valid_entries_stats);
    df_valid_entries_stats.to_csv(mouse_id+'_new_entry_stats.csv');
    df_valid_entries_stats_s = df_valid_entries_stats.sort_values(by='Tail Entry Frame');
    df_valid_entries_stats_sorted = check_repeat_entries(df_valid_entries_stats_s)
    df_valid_entries_stats_sorted = df_valid_entries_stats_sorted.reset_index(drop=True);
    df_valid_entries_stats_sorted.to_csv(mouse_id+'_new_entry_stats_sorted.csv');
    return df_valid_entries_stats, df_valid_entries_stats_sorted;

In [31]:
'''
function just for the movement stats, 
entry frame and exit frame used for entry nose mov and vel and entry tail mov and vel
'''
def get_mov_stats(df_mouse_features, entry, exit, feature_i):
    total_mov=0;
    for m in df_mouse_features.iloc[entry:exit+1, feature_i]:
        total_mov = total_mov+ m;
    return total_mov;

In [32]:
'''
function for tortuosity calculations
get nose entry, max_dist and exit frame
'entry' tortuosity as nose_mov/euclidean dist from nose entry coor to max_dist coor
'exit' tortuosity as nose_mov/euclidean dist from max_dist coor to nose exit coor

the euclidean dist is from the pythagorean theorem for the pair of coordinate points
'''
def get_tortuosity(df_mouse_features, frame1, frame2, i, pixel):
    x1 = df_mouse_features['Nose_x'][frame1]
    y1 = df_mouse_features['Nose_y'][frame1]
    p1 = np.array((x1, y1))
    x2 = df_mouse_features['Nose_x'][frame2]
    y2 = df_mouse_features['Nose_y'][frame2]
    p2 = np.array((x2, y2))
    
    L = get_mov_stats(df_mouse_features, frame1, frame2, i)
    dist = np.linalg.norm(p2-p1)/pixel
    tortuosity = L/dist
    return tortuosity, L, dist;

In [33]:
'''
function to get frames in between and give back a list of coefficients for each arm
'''
def get_coeff(df_features, start, end):
    a_count=0;
    b_count=0;
    c_count=0;
    d_count=0;
    e_count=0;
    none_count=0;
    for i in range(start, end+1):
        none=0
        if df_features.iloc[i, -5]:
            a_count=a_count+1;
        else:
            none=none+1;
        if df_features.iloc[i, -4]:
            b_count=b_count+1;
        else:
            none=none+1;
        if df_features.iloc[i, -3]:
            c_count=c_count+1;
        else:
            none=none+1;        
        if df_features.iloc[i, -2]:
            d_count=d_count+1;
        else:
            none=none+1;            
        if df_features.iloc[i, -1]:
            e_count=e_count+1;
        else:
            none=none+1;
        if none==5:
            none_count=none_count+1;
    tot = a_count+b_count+c_count+d_count+e_count+none_count;
    a_coeff = a_count/tot;
    b_coeff = b_count/tot;
    c_coeff = c_count/tot;
    d_coeff = d_count/tot;
    e_coeff = e_count/tot;
    return a_coeff, b_coeff, c_coeff, d_coeff, e_coeff;

In [34]:
'''
this function will itirate through all entries in order of nose exit frame
call function to calculate time in the middle from nose exit to the next nose entry as duration in middle
calculate movement and velocity for that as well
and then call function to calculate directionality coefficients (from features csv) for those frames and return so that table is ordered as:

    time duration in middle before entry   mov middle velocity middle direct coeff A   direct coeff B   direct coeff C   direct coeff D   direct coeff E
A
B
D
E
C
A
D
B
E
'''
def all_other_stats(df_entries_sorted, df_mouse_features, mouse_id, pixel):
    entries = df_entries_sorted['Arm Entry']
    
    arm_nose_movement=[];
    arm_nose_velocity=[];
    
    entry_nose_tortuosity=[];
    l_entry_nose_tortuosity=[]
    dist_entry_nose_tortuosity=[]
    exit_nose_tortuosity=[];
    l_exit_nose_tortuosity=[]
    dist_exit_nose_tortuosity=[]
    #mean_nose_tortuosity=[];
    other_tortuosity=[];
    l_other_tortuosity=[];
    dist_other_tortuosity=[]
    arm_tail_movement=[];    
    arm_tail_velocity=[];

    middle_duration=[0]
    middle_nose_movement=[0];
    middle_nose_velocity=[0];
    middle_tail_movement=[0];
    middle_tail_velocity=[0];

    entry_lat_nose_mov=[0]
    entry_lat_nose_vel=[0]
    entry_lat_tail_mov=[0]
    entry_lat_tail_vel=[0]

    exit_lat_nose_mov=[]
    exit_lat_nose_vel=[]
    exit_lat_tail_mov=[]
    exit_lat_tail_vel=[]

    tortuosity_center=[0]
    l_center_tortuosity=[0]
    dist_center_tortuosity=[0]

    direct_coeff_A=[0];
    direct_coeff_B=[0];
    direct_coeff_C=[0];
    direct_coeff_D=[0];
    direct_coeff_E=[0];

    new_direct_coeff_A=[0];
    new_direct_coeff_B=[0];
    new_direct_coeff_C=[0];
    new_direct_coeff_D=[0];
    new_direct_coeff_E=[0];

    
    for ent_i in range(0, len(df_entries_sorted)):
        
        #nose_arm_entry = df_entries_sorted.iloc[ent_i, 6];
        nose_arm_entry = df_entries_sorted['Nose Entry Frame'][ent_i]
        #nose_arm_exit = df_entries_sorted.iloc[ent_i, 7];
        nose_arm_exit = df_entries_sorted['Nose Exit Frame'][ent_i]
        tail_arm_entry = df_entries_sorted['Tail Entry Frame'][ent_i];
        tail_arm_exit =  df_entries_sorted['Tail Exit Frame'][ent_i];
        
        n_feature_i = df_mouse_features.columns.get_loc('Movement_mouse_nose');

        a_nose_mov = get_mov_stats(df_mouse_features, tail_arm_entry, nose_arm_exit, n_feature_i); 
        arm_nose_movement.append(a_nose_mov);
        a_nose_vel = a_nose_mov/df_entries_sorted['Duration'][ent_i]; #divide movement by the duration
        arm_nose_velocity.append(a_nose_vel);
        
        exl_nose_mov = get_mov_stats(df_mouse_features, nose_arm_exit, tail_arm_exit, n_feature_i)
        exl_nose_mov = exl_nose_mov>=0 and exl_nose_mov or 0
        exit_lat_nose_mov.append(exl_nose_mov)
        exl_nose_vel = df_entries_sorted['Exit Latency'][ent_i] and exl_nose_mov/df_entries_sorted['Exit Latency'][ent_i] or 0;
        exit_lat_nose_vel.append(exl_nose_vel)
        
        #arm_entry = df_entries_sorted.iloc[ent_i, 1];
        #arm_entry = df_entries_sorted['Entry Frame'][ent_i]
        #arm_exit = df_entries_sorted.iloc[ent_i, 2];
        #arm_exit = df_entries_sorted['Exit Frame'][ent_i]

        t_feature_i=df_mouse_features.columns.get_loc('Movement_mouse_tail_base');
        
        a_tail_mov = get_mov_stats(df_mouse_features, tail_arm_entry, nose_arm_exit, t_feature_i); 
        arm_tail_movement.append(a_tail_mov);
        a_tail_vel = a_tail_mov/df_entries_sorted['Duration'][ent_i]; #divide movement by the duration
        arm_tail_velocity.append(a_tail_vel);

        exl_tail_mov = get_mov_stats(df_mouse_features, nose_arm_exit, tail_arm_exit, t_feature_i)
        exl_tail_mov = exl_tail_mov>=0 and exl_tail_mov or 0
        exit_lat_tail_mov.append(exl_tail_mov)
        exl_tail_vel = df_entries_sorted['Exit Latency'][ent_i] and exl_tail_mov/df_entries_sorted['Exit Latency'][ent_i] or 0;
        exit_lat_tail_vel.append(exl_tail_vel)

        if ent_i>0:
            enl_nose_mov = get_mov_stats(df_mouse_features, nose_arm_entry, tail_arm_entry, n_feature_i)
            enl_nose_mov = enl_nose_mov>=0 and enl_nose_mov or 0
            entry_lat_nose_mov.append(enl_nose_mov)
            enl_nose_vel = df_entries_sorted['Entry Latency'][ent_i] and enl_nose_mov/df_entries_sorted['Entry Latency'][ent_i] or 0;
            entry_lat_nose_vel.append(enl_nose_vel)
            
            enl_tail_mov = get_mov_stats(df_mouse_features, nose_arm_entry, tail_arm_entry, t_feature_i)
            enl_tail_mov = enl_nose_mov>=0 and enl_tail_mov or 0
            entry_lat_tail_mov.append(enl_tail_mov)
            enl_tail_vel = df_entries_sorted['Entry Latency'][ent_i] and enl_tail_mov/df_entries_sorted['Entry Latency'][ent_i] or 0;
            entry_lat_tail_vel.append(enl_tail_vel)
            
        fe = nose_arm_entry
        fm = df_entries_sorted['Nose Max Dist Frame'][ent_i]
        fx = nose_arm_exit
        tortuosity_ent, le, diste = get_tortuosity(df_mouse_features, fe, fm, n_feature_i, pixel)
        entry_nose_tortuosity.append(tortuosity_ent)
        l_entry_nose_tortuosity.append(le)
        dist_entry_nose_tortuosity.append(diste)
        
        tortuosity_ext, lx, distx = get_tortuosity(df_mouse_features, fm, fx, n_feature_i, pixel)
        exit_nose_tortuosity.append(tortuosity_ext)
        l_exit_nose_tortuosity.append(lx)
        dist_exit_nose_tortuosity.append(distx)
        
        #mean_nose_tortuosity.append(np.mean([tortuosity_ent,tortuosity_ext])) 
        
        ot, lot, distot =get_tortuosity(df_mouse_features, fe, fx, n_feature_i, pixel)
        other_tortuosity.append(ot)
        l_other_tortuosity.append(lot)
        dist_other_tortuosity.append(distot)

        
        #now for all middle calculations, those always begin in ent2 (all the time between ent2 and ent1 - as video will always begin with mouse already in ent1)
        # middle duration should be from moment that tail leaves arm to when nose enters arm, not encompassing neither exit nor entry latency
        if ent_i!=0:
            #n_start = df_entries_sorted.iloc[ent_i-1, 7];
            #n_start = df_entries_sorted['Nose Exit Frame'][ent_i-1]
            #n_end = nose_arm_entry;
            #m_n_dur = (n_end-n_start+1)/30

            m_start = df_entries_sorted['Tail Exit Frame'][ent_i-1]
            m_end = df_entries_sorted['Nose Entry Frame'][ent_i]

            m_dur = (m_end-m_start)/30
            m_dur = m_dur>=0 and m_dur or 0;
            middle_duration.append(m_dur);
            
            m_nose_mov = get_mov_stats(df_mouse_features, m_start, m_end, n_feature_i);
            m_nose_mov = m_nose_mov>=0 and m_nose_mov or 0;
            middle_nose_movement.append(m_nose_mov);
            m_nose_vel = m_dur and m_nose_mov/m_dur or 0;
            middle_nose_velocity.append(m_nose_vel);

            #t_start = df_entries_sorted.iloc[ent_i-1, 2];
            #t_start = df_entries_sorted['Exit Frame'][ent_i-1]
            #t_end = tail_arm_entry;
            #m_dur = (t_end-t_start+1)/30
            
            m_tail_mov = get_mov_stats(df_mouse_features, m_start, m_end, t_feature_i);
            m_tail_mov = m_tail_mov>=0 and m_tail_mov or 0;
            middle_tail_movement.append(m_tail_mov);
            m_tail_vel = m_dur and m_tail_mov/m_dur or 0;
            middle_tail_velocity.append(m_tail_vel);

            n_start = df_entries_sorted['Nose Exit Frame'][ent_i-1]
            t_end = tail_arm_entry
            
            
            tc, lc, distc = get_tortuosity(df_mouse_features, n_start, m_end, n_feature_i, pixel)
            tortuosity_center.append(tc)
            l_center_tortuosity.append(lc)
            dist_center_tortuosity.append(distc)
            
            ca, cb, cc, cd, ce = get_coeff(df_mouse_features, n_start, m_end);
            
            nca, ncb, ncc, ncd, nce = get_coeff(df_mouse_features, n_start, t_end);
            
            direct_coeff_A.append(ca);
            direct_coeff_B.append(cb);
            direct_coeff_C.append(cc);
            direct_coeff_D.append(cd);
            direct_coeff_E.append(ce);

            new_direct_coeff_A.append(nca);
            new_direct_coeff_B.append(ncb);
            new_direct_coeff_C.append(ncc);
            new_direct_coeff_D.append(ncd);
            new_direct_coeff_E.append(nce);


    
    other_stats ={'Arm Entry':entries, 'Arm Nose Movement':arm_nose_movement, 'Arm Nose Velocity':arm_nose_velocity,
                  'Arm Tail Movement':arm_tail_movement, 'Arm Tail Velocity':arm_tail_velocity,
                  'Entry Tortuosity':entry_nose_tortuosity, 'L Entry Tortuosity':l_entry_nose_tortuosity, 'D Entry Tortuosity':dist_entry_nose_tortuosity,
                  'Exit Tortuosity':exit_nose_tortuosity, 'L Exit Tortuosity':l_exit_nose_tortuosity, 'D Exit Tortuosity':dist_exit_nose_tortuosity,
                  'Other Tortuosity':other_tortuosity, 'L Other Tortuosity':l_other_tortuosity, 'D Other Tortuosity':dist_other_tortuosity,
                                                    
                  'Entry Latency Nose Movement':entry_lat_nose_mov, 'Entry Latency Nose Velocity':entry_lat_nose_vel,
                  'Entry Latency Tail Movement':entry_lat_tail_mov, 'Entry Latency Tail Velocity':entry_lat_tail_vel,
                  
                  'Middle Duration':middle_duration,
                  'Middle Nose Movement':middle_nose_movement, 'Middle Nose Velocity':middle_nose_velocity, 
                  'Middle Tail Movement':middle_tail_movement, 'Middle Tail Velocity':middle_tail_velocity,
                  
                  'Exit Latency Nose Movement':exit_lat_nose_mov, 'Exit Latency Nose Velocity':exit_lat_nose_vel,
                  'Exit Latency Tail Movement':exit_lat_tail_mov, 'Exit Latency Tail Velocity':exit_lat_tail_vel,
                  
                  'Middle Tortuosity':tortuosity_center, 'L Middle Tortuosity':l_center_tortuosity, 'D Middle Tortuosity':dist_center_tortuosity,
                  
                  'Directionality Coefficiet A':direct_coeff_A, 'Directionality Coefficiet B':direct_coeff_B, 
                  'Directionality Coefficiet C':direct_coeff_C, 'Directionality Coefficiet D':direct_coeff_D, 
                  'Directionality Coefficiet E':direct_coeff_E,
                  'New Directionality Coefficiet A':new_direct_coeff_A, 'New Directionality Coefficiet B':new_direct_coeff_B, 
                  'New Directionality Coefficiet C':new_direct_coeff_C, 'New Directionality Coefficiet D':new_direct_coeff_D, 
                  'New Directionality Coefficiet E':new_direct_coeff_E
                 }
    df_other_stats = pd.DataFrame(other_stats);
    df_other_stats = df_other_stats.reset_index(drop=True);
    df_other_stats.to_csv(mouse_id+'_newest_other_stats.csv');
    return df_other_stats;


In [35]:
'''
this function will just get the coefficients of the current arm entered and the past two so in ABC at C look at coeff for C and coeff for A
    current coeff  skip arm coeff   current coeff time proportional   skip arm coeff time proportional
A    value for A     value for none
B    value for B     value for none
C    value for C    value for A
D    value for D    value for B
E    value for E    value for C
A
C
'''
def add_lag_dir_coefficients(df_other_stats, df_entries_sorted, mouse_id):
    entries = df_other_stats.iloc[:,0];
    
    cur_coeff=[0];
    lag_1_cur_coeff=[0,0];
    lag_2_cur_coeff=[0,0,0];
    lag_3_cur_coeff=[0,0,0,0];
    lag_4_cur_coeff=[0,0,0,0,0];

    new_cur_coeff=[0];
    new_lag_1_cur_coeff=[0,0];
    new_lag_2_cur_coeff=[0,0,0];
    new_lag_3_cur_coeff=[0,0,0,0];
    new_lag_4_cur_coeff=[0,0,0,0,0];
    
    #lag_2_coeff=[0]; 
    
    cur_coeff_time=[0];
    lag_1_cur_coeff_time=[0,0];
    lag_2_cur_coeff_time=[0,0,0];
    lag_3_cur_coeff_time=[0,0,0,0];
    lag_4_cur_coeff_time=[0,0,0,0,0];
    #lag_2_coeff_time=[0];

    new_cur_coeff_time=[0];
    new_lag_1_cur_coeff_time=[0,0];
    new_lag_2_cur_coeff_time=[0,0,0];
    new_lag_3_cur_coeff_time=[0,0,0,0];
    new_lag_4_cur_coeff_time=[0,0,0,0,0];

    for i in range(1, len(df_other_stats)):
        
        arms = ['A', 'B', 'C', 'D', 'E']
        
        li = arms.index(df_other_stats['Arm Entry'][i])

        indcs = df_other_stats.columns.get_loc('Directionality Coefficiet A')
        indnewcs = df_other_stats.columns.get_loc('New Directionality Coefficiet A')
        
        '''
        let = df_other_stats['Arm Entry'][i]        
        
        if let=='E':
            ind=-1
        elif let=='D':
            ind=-2
        elif let=='C':
            ind=-3
        elif let=='B':
            ind=-4
        elif let=='A':
            ind=-5
        '''
        c = df_other_stats.iloc[i,indcs+li]
        cur_coeff.append(c)
        tc = c*(df_other_stats['Middle Duration'][i]+df_entries_sorted['Exit Latency'][i-1])
        cur_coeff_time.append(tc)
        
        nc = df_other_stats.iloc[i,indnewcs+li]
        new_cur_coeff.append(nc)
        ntc = nc*(df_other_stats['Middle Duration'][i]+df_entries_sorted['Exit Latency'][i-1]+df_entries_sorted['Entry Latency'][i])
        new_cur_coeff_time.append(ntc)         
        
        if i > 4:
            c = df_other_stats.iloc[i-4,indcs+li]
            lag_4_cur_coeff.append(c)
            tc = c*(df_other_stats['Middle Duration'][i-4]+df_entries_sorted['Exit Latency'][i-5])
            lag_4_cur_coeff_time.append(tc)           
        
            nc = df_other_stats.iloc[i-4,indnewcs+li]
            new_lag_4_cur_coeff.append(nc)
            ntc = nc*(df_other_stats['Middle Duration'][i-4]+df_entries_sorted['Exit Latency'][i-5]+df_entries_sorted['Entry Latency'][i-4])
            new_lag_4_cur_coeff_time.append(ntc)         
            
        if i > 3:
            c = df_other_stats.iloc[i-3,indcs+li]
            lag_3_cur_coeff.append(c)
            tc = c*(df_other_stats['Middle Duration'][i-3]+df_entries_sorted['Exit Latency'][i-4])
            lag_3_cur_coeff_time.append(tc)

            nc = df_other_stats.iloc[i-3,indnewcs+li]
            new_lag_3_cur_coeff.append(nc)
            ntc = nc*(df_other_stats['Middle Duration'][i-3]+df_entries_sorted['Exit Latency'][i-4]+df_entries_sorted['Entry Latency'][i-3])
            new_lag_3_cur_coeff_time.append(ntc)
        
        if i > 2:
            c = df_other_stats.iloc[i-2,indcs+li]
            lag_2_cur_coeff.append(c)
            tc = c*(df_other_stats['Middle Duration'][i-2]+df_entries_sorted['Exit Latency'][i-3])
            lag_2_cur_coeff_time.append(tc)
        
            nc = df_other_stats.iloc[i-2,indnewcs+li]
            new_lag_2_cur_coeff.append(nc)
            ntc = nc*(df_other_stats['Middle Duration'][i-2]+df_entries_sorted['Exit Latency'][i-3]+df_entries_sorted['Entry Latency'][i-2])
            new_lag_2_cur_coeff_time.append(ntc)        
        
        if i > 1:
            c = df_other_stats.iloc[i-1,indcs+li]
            lag_1_cur_coeff.append(c)
            tc = c*(df_other_stats['Middle Duration'][i-1]+df_entries_sorted['Exit Latency'][i-2])
            lag_1_cur_coeff_time.append(tc)
        
            nc = df_other_stats.iloc[i-1,indnewcs+li]
            new_lag_1_cur_coeff.append(nc)
            ntc = nc*(df_other_stats['Middle Duration'][i-1]+df_entries_sorted['Exit Latency'][i-2]+df_entries_sorted['Entry Latency'][i-1])
            new_lag_1_cur_coeff_time.append(ntc)   
    
    lagged_coeff ={'Arm Entry':entries, 'Current Coeff':cur_coeff, 'Lagged 1 Cur Coeff':lag_1_cur_coeff, 'Lagged 2 Cur Coeff':lag_2_cur_coeff,
                   'Lagged 3 Cur Coeff':lag_3_cur_coeff, 'Lagged 4 Cur Coeff':lag_4_cur_coeff,
                   'Current Coeff Time Proportional':cur_coeff_time, 'Lagged 1 Cur Coeff Time Proportional':lag_1_cur_coeff_time,
                   'Lagged 2 Cur Coeff Time Proportional':lag_2_cur_coeff_time, 'Lagged 3 Cur Coeff Time Proportional':lag_3_cur_coeff_time,
                   'Lagged 4 Cur Coeff Time Proportional':lag_4_cur_coeff_time,

                   'New Current Coeff':new_cur_coeff, 'New Lagged 1 Cur Coeff':new_lag_1_cur_coeff, 'New Lagged 2 Cur Coeff':new_lag_2_cur_coeff,
                   'New Lagged 3 Cur Coeff':new_lag_3_cur_coeff, 'New Lagged 4 Cur Coeff':new_lag_4_cur_coeff,
                   'New Current Coeff Time Proportional':new_cur_coeff_time, 'New Lagged 1 Cur Coeff Time Proportional':new_lag_1_cur_coeff_time,
                   'New Lagged 2 Cur Coeff Time Proportional':new_lag_2_cur_coeff_time, 'New Lagged 3 Cur Coeff Time Proportional':new_lag_3_cur_coeff_time,
                   'New Lagged 4 Cur Coeff Time Proportional':new_lag_4_cur_coeff_time
                  }
    
    df_lagged_coeff = pd.DataFrame(lagged_coeff);
    df_lagged_coeff = df_lagged_coeff.reset_index(drop=True);
    df_lagged_coeff.to_csv(mouse_id+'_newest_lagged_coeff.csv');
    return df_lagged_coeff;

In [36]:
def check_args(args):
    '''
    function checks whether list of arguments are unique
    returns boolean
    '''
    sequence=[];
    for arg in args:
        sequence.append(arg.lower());
    isunique = len(sequence)==len(set(sequence)) # set() removes repetitions
    return isunique #True for unique sequence, False if repetition

In [37]:
'''
this function will be to return output
'''
def get_output(*args):
    for i in range(len(args)-1):
        output = len(args);
        if check_args(args[i:]):
            output = output-i;
            break;
        else:
            output = 1;
    return output;


In [38]:
'''
this function will add mouse turns for any possible turn direction and performance interactions
'''
def get_turn(*args):
    list  = ['A', 'B', 'C', 'D', 'E', 'A' , 'B', 'C', 'D', 'E']
    i1 = list.index(args[0])
    for i in range(i1, len(list)):
        if list[i]==args[1]:
            i2 = i;
            break;
    t =  i2-i1;
    turns = ['N', '1L', '2L', '2R', '1R', 'N']
    turn = turns[t]
    return turn;

In [39]:
'''
this function will set ouput as the choice level (5,4,3,2, and 1)
A   1      
B   2 
D   3
E   4
C   5
A   5
D   4
B   5
E   5
and will finally create all info needed for the next step - classifier
'''
def add_choice_output(df_entry_stats_ordered, df_other_stats, df_lagged_coeff, mouse_id):
    entries = df_entry_stats_ordered['Arm Entry'];

    choice_output=[1];
   
    p_exitmid_1 = [0]
    p_midentry_1 = [0]
    p_exitmidentry_1 = [0]

   
    for ind in range(1, len(entries)):

        p_exitmid_1.append(np.sum([df_entry_stats_ordered['Exit Latency'][ind-1],df_other_stats['Middle Duration'][ind]])); 
        p_midentry_1.append(np.sum([df_other_stats['Middle Duration'][ind], df_entry_stats_ordered['Entry Latency'][ind]]));
        p_exitmidentry_1.append(np.sum([df_entry_stats_ordered['Exit Latency'][ind-1],df_other_stats['Middle Duration'][ind], df_entry_stats_ordered['Entry Latency'][ind]]));
        
        ent1 = df_entry_stats_ordered.iloc[ind-4,0]
        ent2 = df_entry_stats_ordered.iloc[ind-3,0]
        ent3 = df_entry_stats_ordered.iloc[ind-2,0]
        ent4 = df_entry_stats_ordered.iloc[ind-1,0]
        ent5 = df_entry_stats_ordered.iloc[ind,0]

        if ind > 3:
            temp_output = get_output(ent1, ent2, ent3, ent4, ent5)
            choice_output.append(temp_output);
        elif ind > 2:
            temp_output = get_output(ent2, ent3, ent4, ent5)
            choice_output.append(temp_output);        
        elif ind >1:
            temp_output = get_output(ent3, ent4, ent5)
            choice_output.append(temp_output);
        elif ind >0:
            temp_output = get_output(ent4, ent5)
            choice_output.append(temp_output);
    
    turn = ['N'];
    for i in range(1, len(entries)):
        temp_turn = get_turn(entries[i-1],entries[i])
        turn.append(temp_turn);

    df_everything_stats = df_entry_stats_ordered.drop(columns = ['Tail Entry Frame', 'Tail Exit Frame', 'Duration', 'Entry Latency', 'Exit Latency',
                                                                 'Nose Max Distance', 'Nose Time to Max Dist', 'Tail Base Max Distance', 'Tail Base Time to Max Dist',
                                                                 'Nose Entry Frame', 'Nose Exit Frame', 'Nose Max Dist Frame', 'Tail Base Max Dist Frame'])

    #df_everything_stats = df_entry_stats_ordered['Arm Entry']
    
    df_everything_stats['Middle Duration'] = df_other_stats['Middle Duration']
    df_everything_stats['Middle Nose Movement'] = df_other_stats['Middle Nose Movement']
    df_everything_stats['Middle Nose Velocity'] = df_other_stats['Middle Nose Velocity']
    df_everything_stats['Middle Tail Movement'] = df_other_stats['Middle Tail Movement']
    df_everything_stats['Middle Tail Velocity'] = df_other_stats['Middle Tail Velocity']
    
    df_everything_stats['Middle Tortuosity'] = df_other_stats['Middle Tortuosity']

    df_everything_stats['Current Coeff'] = df_lagged_coeff['Current Coeff']
    df_everything_stats['Lagged 1 Cur Coeff'] = df_lagged_coeff['Lagged 1 Cur Coeff']
    df_everything_stats['Lagged 2 Cur Coeff'] = df_lagged_coeff['Lagged 2 Cur Coeff']
    df_everything_stats['Lagged 3 Cur Coeff'] = df_lagged_coeff['Lagged 3 Cur Coeff']
    df_everything_stats['Lagged 4 Cur Coeff'] = df_lagged_coeff['Lagged 4 Cur Coeff']

    df_everything_stats['New Current Coeff'] = df_lagged_coeff['New Current Coeff']
    df_everything_stats['New Lagged 1 Cur Coeff'] = df_lagged_coeff['New Lagged 1 Cur Coeff']
    df_everything_stats['New Lagged 2 Cur Coeff'] = df_lagged_coeff['New Lagged 2 Cur Coeff']
    df_everything_stats['New Lagged 3 Cur Coeff'] = df_lagged_coeff['New Lagged 3 Cur Coeff']
    df_everything_stats['New Lagged 4 Cur Coeff'] = df_lagged_coeff['New Lagged 4 Cur Coeff']    
    
    df_everything_stats['Current Coeff Time Proportional'] = df_lagged_coeff['Current Coeff Time Proportional']
    df_everything_stats['Lagged 1 Cur Coeff Time Proportional'] = df_lagged_coeff['Lagged 1 Cur Coeff Time Proportional']
    df_everything_stats['Lagged 2 Cur Coeff Time Proportional'] = df_lagged_coeff['Lagged 2 Cur Coeff Time Proportional']
    df_everything_stats['Lagged 3 Cur Coeff Time Proportional'] = df_lagged_coeff['Lagged 3 Cur Coeff Time Proportional']
    df_everything_stats['Lagged 4 Cur Coeff Time Proportional'] = df_lagged_coeff['Lagged 4 Cur Coeff Time Proportional']

    df_everything_stats['New Current Coeff Time Proportional'] = df_lagged_coeff['New Current Coeff Time Proportional']
    df_everything_stats['New Lagged 1 Cur Coeff Time Proportional'] = df_lagged_coeff['New Lagged 1 Cur Coeff Time Proportional']
    df_everything_stats['New Lagged 2 Cur Coeff Time Proportional'] = df_lagged_coeff['New Lagged 2 Cur Coeff Time Proportional']
    df_everything_stats['New Lagged 3 Cur Coeff Time Proportional'] = df_lagged_coeff['New Lagged 3 Cur Coeff Time Proportional']
    df_everything_stats['New Lagged 4 Cur Coeff Time Proportional'] = df_lagged_coeff['New Lagged 4 Cur Coeff Time Proportional']

    df_everything_stats['Entry Latency'] = df_entry_stats_ordered['Entry Latency']
    df_everything_stats['Entry Latency Nose Movement'] = df_other_stats['Entry Latency Nose Movement']
    df_everything_stats['Entry Latency Nose Velocity'] = df_other_stats['Entry Latency Nose Velocity']
    df_everything_stats['Entry Latency Tail Movement'] = df_other_stats['Entry Latency Tail Movement']
    df_everything_stats['Entry Latency Tail Velocity'] = df_other_stats['Entry Latency Tail Velocity']

    df_everything_stats['Duration'] = df_entry_stats_ordered['Duration']
    df_everything_stats['Nose Max Distance'] = df_entry_stats_ordered['Nose Max Distance']
    df_everything_stats['Nose Time to Max Dist'] = df_entry_stats_ordered['Nose Time to Max Dist']
    
    df_everything_stats['Tail Base Max Distance'] = df_entry_stats_ordered['Tail Base Max Distance']
    df_everything_stats['Tail Base Time to Max Dist'] = df_entry_stats_ordered['Tail Base Time to Max Dist']

    df_everything_stats['Arm Nose Movement'] = df_other_stats['Arm Nose Movement']
    df_everything_stats['Arm Nose Velocity'] = df_other_stats['Arm Nose Velocity']
    df_everything_stats['Arm Tail Movement'] = df_other_stats['Arm Tail Movement']
    df_everything_stats['Arm Tail Velocity'] = df_other_stats['Arm Tail Velocity']

    df_everything_stats['Entry Tortuosity'] = df_other_stats['Entry Tortuosity']
    df_everything_stats['Exit Tortuosity'] = df_other_stats['Exit Tortuosity']
    df_everything_stats['Other Tortuosity'] = df_other_stats['Other Tortuosity']

    df_everything_stats['Exit Latency'] = df_entry_stats_ordered['Exit Latency']
    df_everything_stats['Exit Latency Nose Movement'] = df_other_stats['Exit Latency Nose Movement']
    df_everything_stats['Exit Latency Nose Velocity'] = df_other_stats['Exit Latency Nose Velocity']
    df_everything_stats['Exit Latency Tail Movement'] = df_other_stats['Exit Latency Tail Movement']
    df_everything_stats['Exit Latency Tail Velocity'] = df_other_stats['Exit Latency Tail Velocity']

    df_everything_stats['ExitMid Duration'] = p_exitmid_1
    df_everything_stats['MidEntry Duration'] = p_midentry_1
    df_everything_stats['ExitMidEntry Duration'] = p_exitmidentry_1
    
    df_everything_stats['Turn'] = turn
    df_everything_stats['Choice Output'] = choice_output
    df_everything_stats = df_everything_stats.reset_index(drop=True);    
    df_everything_stats.to_csv(mouse_id+'_newest_everything_stats.csv');    

    return df_everything_stats;

In [40]:
'''
function that gets df and uses minmaxscaler, then returns scaled df and saves it to csv
'''
def scale_it(df_everything_stats, mouse_id):
    from sklearn.preprocessing import MinMaxScaler
    minmaxsc =  MinMaxScaler(copy=True, clip=False)
    mm_df = df_everything_stats.copy()
    mm_df[mm_df.columns[1:-2]] = minmaxsc.fit_transform(df_everything_stats[df_everything_stats.columns[1:-2]])

    mm_df.to_csv(mouse_id+'_newest_mm_everything_stats.csv');
    return mm_df;
    

In [41]:
'''
function to create the lagging for all previous 4 choices
'''
def lagging_four_prev_choices(df_pls, mouse_id):
    lagged_df_everything_stats = df_pls.select(
    
    "Middle Duration", 
    *[pl.col("Middle Duration").shift(i).alias(f"lagged_{i}_mid_duration") for i in [1,2,3,4]],
    "Middle Nose Movement", 
    *[pl.col("Middle Nose Movement").shift(i).alias(f"lagged_{i}_mid_nose_mov") for i in [1,2,3,4]],
    "Middle Nose Velocity", 
    *[pl.col("Middle Nose Velocity").shift(i).alias(f"lagged_{i}_mid_nose_vel") for i in [1,2,3,4]],
    "Middle Tail Movement", 
    *[pl.col("Middle Tail Movement").shift(i).alias(f"lagged_{i}_mid_tail_mov") for i in [1,2,3,4]],
    "Middle Tail Velocity", 
    *[pl.col("Middle Tail Velocity").shift(i).alias(f"lagged_{i}_mid_tail_vel") for i in [1,2,3,4]],       
        
    "Middle Tortuosity", 
    *[pl.col("Middle Tortuosity").shift(i).alias(f"lagged_{i}_mid_tortuosity") for i in [1,2,3,4]],          
    
    "Current Coeff", 
    *[pl.col("Current Coeff").shift(i).alias(f"lagged_{i}_current_coeff") for i in [1,2,3,4]],   
    "Lagged 1 Cur Coeff", 
    *[pl.col("Lagged 1 Cur Coeff").shift(i).alias(f"lagged_{i}_lagged_1_cur_coeff") for i in [1,2,3,4]],
    "Lagged 2 Cur Coeff", 
    *[pl.col("Lagged 2 Cur Coeff").shift(i).alias(f"lagged_{i}_lagged_2_cur_coeff") for i in [1,2,3,4]],
    "Lagged 3 Cur Coeff", 
    *[pl.col("Lagged 3 Cur Coeff").shift(i).alias(f"lagged_{i}_lagged_3_cur_coeff") for i in [1,2,3,4]],
    "Lagged 4 Cur Coeff", 
    *[pl.col("Lagged 4 Cur Coeff").shift(i).alias(f"lagged_{i}_lagged_4_cur_coeff") for i in [1,2,3,4]],

    "New Current Coeff", 
    *[pl.col("New Current Coeff").shift(i).alias(f"lagged_{i}_new_current_coeff") for i in [1,2,3,4]],   
    "New Lagged 1 Cur Coeff", 
    *[pl.col("New Lagged 1 Cur Coeff").shift(i).alias(f"lagged_{i}_new_lagged_1_cur_coeff") for i in [1,2,3,4]],
    "New Lagged 2 Cur Coeff", 
    *[pl.col("New Lagged 2 Cur Coeff").shift(i).alias(f"lagged_{i}_new_lagged_2_cur_coeff") for i in [1,2,3,4]],
    "New Lagged 3 Cur Coeff", 
    *[pl.col("New Lagged 3 Cur Coeff").shift(i).alias(f"lagged_{i}_new_lagged_3_cur_coeff") for i in [1,2,3,4]],
    "New Lagged 4 Cur Coeff", 
    *[pl.col("New Lagged 4 Cur Coeff").shift(i).alias(f"lagged_{i}_new_lagged_4_cur_coeff") for i in [1,2,3,4]],
    
    "Current Coeff Time Proportional", 
    *[pl.col("Current Coeff Time Proportional").shift(i).alias(f"lagged_{i}_current_coeff_time_prop") for i in [1,2,3,4]],
    "Lagged 1 Cur Coeff Time Proportional", 
    *[pl.col("Lagged 1 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_lagged_1_cur_coeff_time_prop") for i in [1,2,3,4]],
    "Lagged 2 Cur Coeff Time Proportional", 
    *[pl.col("Lagged 2 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_lagged_2_cur_coeff_time_prop") for i in [1,2,3,4]],
    "Lagged 3 Cur Coeff Time Proportional", 
    *[pl.col("Lagged 3 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_lagged_3_cur_coeff_time_prop") for i in [1,2,3,4]],
    "Lagged 4 Cur Coeff Time Proportional", 
    *[pl.col("Lagged 4 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_lagged_4_cur_coeff_time_prop") for i in [1,2,3,4]],

    "New Current Coeff Time Proportional", 
    *[pl.col("New Current Coeff Time Proportional").shift(i).alias(f"lagged_{i}_new_current_coeff_time_prop") for i in [1,2,3,4]],
    "New Lagged 1 Cur Coeff Time Proportional", 
    *[pl.col("New Lagged 1 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_new_lagged_1_cur_coeff_time_prop") for i in [1,2,3,4]],
    "New Lagged 2 Cur Coeff Time Proportional", 
    *[pl.col("New Lagged 2 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_new_lagged_2_cur_coeff_time_prop") for i in [1,2,3,4]],
    "New Lagged 3 Cur Coeff Time Proportional", 
    *[pl.col("New Lagged 3 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_new_lagged_3_cur_coeff_time_prop") for i in [1,2,3,4]],
    "New Lagged 4 Cur Coeff Time Proportional", 
    *[pl.col("New Lagged 4 Cur Coeff Time Proportional").shift(i).alias(f"lagged_{i}_new_lagged_4_cur_coeff_time_prop") for i in [1,2,3,4]],    
    
    "Entry Latency", 
    *[pl.col("Entry Latency").shift(i).alias(f"lagged_{i}_entry_lat") for i in [1,2,3,4]],
    "Entry Latency Nose Movement", 
    *[pl.col("Entry Latency Nose Movement").shift(i).alias(f"lagged_{i}_entry_lat_nose_mov") for i in [1,2,3,4]],
    "Entry Latency Nose Velocity", 
    *[pl.col("Entry Latency Nose Velocity").shift(i).alias(f"lagged_{i}_entry_lat_nose_vel") for i in [1,2,3,4]],
    "Entry Latency Tail Movement", 
    *[pl.col("Entry Latency Tail Movement").shift(i).alias(f"lagged_{i}_entry_lat_tail_mov") for i in [1,2,3,4]],
    "Entry Latency Tail Velocity", 
    *[pl.col("Entry Latency Tail Velocity").shift(i).alias(f"lagged_{i}_entry_lat_tail_vel") for i in [1,2,3,4]],   
 
    "Duration", 
    *[pl.col("Duration").shift(i).alias(f"lagged_{i}_duration") for i in [1,2,3,4]],
    "Nose Max Distance",
    *[pl.col("Nose Max Distance").shift(i).alias(f"lagged_{i}_nose_max_distance") for i in [1,2,3,4]],    
    "Nose Time to Max Dist",
    *[pl.col("Nose Time to Max Dist").shift(i).alias(f"lagged_{i}_nose_time_to_max_dist") for i in [1,2,3,4]],        
    "Tail Base Max Distance",
    *[pl.col("Tail Base Max Distance").shift(i).alias(f"lagged_{i}_tail_max_distance") for i in [1,2,3,4]],    
    "Tail Base Time to Max Dist",
    *[pl.col("Tail Base Time to Max Dist").shift(i).alias(f"lagged_{i}_tail_time_to_max_dist") for i in [1,2,3,4]],       
    "Arm Nose Movement", 
    *[pl.col("Arm Nose Movement").shift(i).alias(f"lagged_{i}_arm_nose_mov") for i in [1,2,3,4]],
    "Arm Nose Velocity", 
    *[pl.col("Arm Nose Velocity").shift(i).alias(f"lagged_{i}_arm_nose_vel") for i in [1,2,3,4]],
    "Arm Tail Movement", 
    *[pl.col("Arm Tail Movement").shift(i).alias(f"lagged_{i}_arm_tail_mov") for i in [1,2,3,4]],
    "Arm Tail Velocity", 
    *[pl.col("Arm Tail Velocity").shift(i).alias(f"lagged_{i}_arm_tail_vel") for i in [1,2,3,4]],
    "Entry Tortuosity",
    *[pl.col("Entry Tortuosity").shift(i).alias(f"lagged_{i}_entry_tortuosity") for i in [1,2,3,4]],       
    "Exit Tortuosity",
    *[pl.col("Exit Tortuosity").shift(i).alias(f"lagged_{i}_exit_tortuosity") for i in [1,2,3,4]],    
    "Other Tortuosity",
    *[pl.col("Other Tortuosity").shift(i).alias(f"lagged_{i}_other_tortuosity") for i in [1,2,3,4]],    

    "Exit Latency", 
    *[pl.col("Exit Latency").shift(i).alias(f"lagged_{i}_exit_lat") for i in [1,2,3,4]], 
    "Exit Latency Nose Movement", 
    *[pl.col("Exit Latency Nose Movement").shift(i).alias(f"lagged_{i}_exit_lat_nose_mov") for i in [1,2,3,4]],
    "Exit Latency Nose Velocity", 
    *[pl.col("Exit Latency Nose Velocity").shift(i).alias(f"lagged_{i}_exit_lat_nose_vel") for i in [1,2,3,4]],
    "Exit Latency Tail Movement", 
    *[pl.col("Exit Latency Tail Movement").shift(i).alias(f"lagged_{i}_exit_lat_tail_mov") for i in [1,2,3,4]],
    "Exit Latency Tail Velocity", 
    *[pl.col("Exit Latency Tail Velocity").shift(i).alias(f"lagged_{i}_exit_lat_tail_vel") for i in [1,2,3,4]],   

    "ExitMid Duration",
    *[pl.col("ExitMid Duration").shift(i).alias(f"lagged_{i}_exitmid_duration") for i in [1,2,3,4]],
    "MidEntry Duration", 
    *[pl.col("MidEntry Duration").shift(i).alias(f"lagged_{i}_midentry_duration") for i in [1,2,3,4]],
    "ExitMidEntry Duration",
    *[pl.col("ExitMidEntry Duration").shift(i).alias(f"lagged_{i}_exitmidentry_duration") for i in [1,2,3,4]],
    
    "Turn",
    *[pl.col("Turn").shift(i).alias(f"lagged_{i}_turn_ago") for i in [1,2,3,4]],
    "Choice Output", 
    *[pl.col("Choice Output").shift(i).alias(f"lagged_{i}_choice_ago") for i in [1,2,3,4]]
    )
    lagged_df_everything_stats = lagged_df_everything_stats.drop_nulls()
    
    lagged_df_everything_stats.write_csv(mouse_id+'_newest_lagged_mm_everything_stats.csv')
    return lagged_df_everything_stats;

In [42]:
def added_time_means(lagged_df_everything_stats, mouse_id):
    f = lagged_df_everything_stats.drop(columns = ['Choice Output', 'lagged_1_choice_ago', 'lagged_2_choice_ago','lagged_3_choice_ago', 
                                                   'lagged_4_choice_ago']);
    r_arm_mean2s = []
    r_arm_mean3s = []
    r_arm_mean4s = []
    r_arm_mean5s = []

    p_arm_mean2s = []
    p_arm_mean3s = []
    p_arm_mean4s = []

    p_mid_mean2s = []
    p_mid_mean3s = []
    p_mid_mean4s = []
    p_mid_mean5s = []

    r_arm_2s = []
    r_arm_3s = []
    r_arm_4s = []
    r_arm_5s = []

    p_arm_2s = []
    p_arm_3s = []
    p_arm_4s = []

    p_mid_2s = []
    p_mid_3s = []
    p_mid_4s = []
    p_mid_5s = []
    
    p_exitmid_2s = []
    p_exitmid_3s = []
    p_exitmid_4s = []
    p_exitmid_5s = []

    p_exitmid_mean2s = []
    p_exitmid_mean3s = []
    p_exitmid_mean4s = []
    p_exitmid_mean5s = []

    p_midentry_2s = []
    p_midentry_3s = []
    p_midentry_4s = []
    p_midentry_5s = []

    p_midentry_mean2s = []
    p_midentry_mean3s = []
    p_midentry_mean4s = []
    p_midentry_mean5s = []
    
    p_exitmidentry_2s = []
    p_exitmidentry_3s = []
    p_exitmidentry_4s = []
    p_exitmidentry_5s = []

    p_exitmidentry_mean2s = []
    p_exitmidentry_mean3s = []
    p_exitmidentry_mean4s = []
    p_exitmidentry_mean5s = []

    
    for i in range(0, len(lagged_df_everything_stats)):
        r_arm_2s.append(np.sum([f['Duration'][i], f['lagged_1_duration'][i]]));
        r_arm_3s.append(np.sum([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i]]));
        r_arm_4s.append(np.sum([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i]]));
        r_arm_5s.append(np.sum([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i], f['lagged_4_duration'][i]]));        
        
        r_arm_mean2s.append(np.average([f['Duration'][i], f['lagged_1_duration'][i]]));
        r_arm_mean3s.append(np.average([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i]]));
        r_arm_mean4s.append(np.average([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i]]));
        r_arm_mean5s.append(np.average([f['Duration'][i], f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i], f['lagged_4_duration'][i]]));

        p_arm_2s.append(np.sum([f['lagged_1_duration'][i], f['lagged_2_duration'][i]]));
        p_arm_3s.append(np.sum([f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i]]));
        p_arm_4s.append(np.sum([f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i], f['lagged_4_duration'][i]]));
        
        p_arm_mean2s.append(np.average([f['lagged_1_duration'][i], f['lagged_2_duration'][i]]));
        p_arm_mean3s.append(np.average([f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i]]));
        p_arm_mean4s.append(np.average([f['lagged_1_duration'][i], f['lagged_2_duration'][i], f['lagged_3_duration'][i], f['lagged_4_duration'][i]]));
       
        p_mid_2s.append(np.sum([f['Middle Duration'][i], f['lagged_1_mid_duration'][i]]));
        p_mid_3s.append(np.sum([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i]]));
        p_mid_4s.append(np.sum([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i], f['lagged_3_mid_duration'][i]]));
        p_mid_5s.append(np.sum([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i], f['lagged_3_mid_duration'][i], f['lagged_4_mid_duration'][i]]));
        
        p_mid_mean2s.append(np.average([f['Middle Duration'][i], f['lagged_1_mid_duration'][i]]));
        p_mid_mean3s.append(np.average([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i]]));
        p_mid_mean4s.append(np.average([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i], f['lagged_3_mid_duration'][i]]));
        p_mid_mean5s.append(np.average([f['Middle Duration'][i], f['lagged_1_mid_duration'][i], f['lagged_2_mid_duration'][i], f['lagged_3_mid_duration'][i], f['lagged_4_mid_duration'][i]]));

        p_exitmid_2s.append(np.sum([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i]]));
        p_exitmid_3s.append(np.sum([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i]]));
        p_exitmid_4s.append(np.sum([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i], f['lagged_3_exitmid_duration'][i]]));
        p_exitmid_5s.append(np.sum([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i], f['lagged_3_exitmid_duration'][i], f['lagged_4_exitmid_duration'][i]]));
        
        p_exitmid_mean2s.append(np.average([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i]]));
        p_exitmid_mean3s.append(np.average([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i]]));
        p_exitmid_mean4s.append(np.average([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i], f['lagged_3_exitmid_duration'][i]]));
        p_exitmid_mean5s.append(np.average([f['ExitMid Duration'][i], f['lagged_1_exitmid_duration'][i], f['lagged_2_exitmid_duration'][i], f['lagged_3_exitmid_duration'][i], f['lagged_4_exitmid_duration'][i]]));

        p_midentry_2s.append(np.sum([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i]]));
        p_midentry_3s.append(np.sum([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i]]));
        p_midentry_4s.append(np.sum([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i], f['lagged_3_midentry_duration'][i]]));
        p_midentry_5s.append(np.sum([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i], f['lagged_3_midentry_duration'][i], f['lagged_4_midentry_duration'][i]]));

        p_midentry_mean2s.append(np.average([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i]]));
        p_midentry_mean3s.append(np.average([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i]]));
        p_midentry_mean4s.append(np.average([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i], f['lagged_3_midentry_duration'][i]]));
        p_midentry_mean5s.append(np.average([f['MidEntry Duration'][i], f['lagged_1_midentry_duration'][i], f['lagged_2_midentry_duration'][i], f['lagged_3_midentry_duration'][i], f['lagged_4_midentry_duration'][i]]));
        
        p_exitmidentry_2s.append(np.sum([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i]]));
        p_exitmidentry_3s.append(np.sum([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i]]));
        p_exitmidentry_4s.append(np.sum([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i], f['lagged_3_exitmidentry_duration'][i]]));
        p_exitmidentry_5s.append(np.sum([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i], f['lagged_3_exitmidentry_duration'][i], f['lagged_4_exitmidentry_duration'][i]]));
        
        p_exitmidentry_mean2s.append(np.average([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i]]));
        p_exitmidentry_mean3s.append(np.average([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i]]));
        p_exitmidentry_mean4s.append(np.average([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i], f['lagged_3_exitmidentry_duration'][i]]));
        p_exitmidentry_mean5s.append(np.average([f['ExitMidEntry Duration'][i], f['lagged_1_exitmidentry_duration'][i], f['lagged_2_exitmidentry_duration'][i], f['lagged_3_exitmidentry_duration'][i], f['lagged_4_exitmidentry_duration'][i]]));
               

    
    f['2_cur_arm_duration'] = r_arm_2s;
    f['3_cur_arm_duration'] = r_arm_3s;
    f['4_cur_arm_duration'] = r_arm_4s;
    f['5_cur_arm_duration'] = r_arm_5s;        

    f['mean_2_cur_arm_duration'] = r_arm_mean2s;
    f['mean_3_cur_arm_duration'] = r_arm_mean3s;
    f['mean_4_cur_arm_duration'] = r_arm_mean4s;
    f['mean_5_cur_arm_duration'] = r_arm_mean5s;

    f['2_past_arm_duration'] = p_arm_2s;
    f['3_past_arm_duration'] = p_arm_3s;
    f['4_past_arm_duration'] = p_arm_4s;
    
    f['mean_2_past_arm_duration'] = p_arm_mean2s;
    f['mean_3_past_arm_duration'] = p_arm_mean3s;
    f['mean_4_past_arm_duration'] = p_arm_mean4s;

    f['2_past_mid_duration'] = p_mid_2s;
    f['3_past_mid_duration'] = p_mid_3s;
    f['4_past_mid_duration'] = p_mid_4s;
    f['5_past_mid_duration'] = p_mid_5s;
    
    f['mean_2_past_mid_duration'] = p_mid_mean2s;
    f['mean_3_past_mid_duration'] = p_mid_mean3s;
    f['mean_4_past_mid_duration'] = p_mid_mean4s;
    f['mean_5_past_mid_duration'] = p_mid_mean5s;


    f['2_past_exitmid_duration'] = p_exitmid_2s;
    f['3_past_exitmid_duration'] = p_exitmid_3s;
    f['4_past_exitmid_duration'] = p_exitmid_4s;
    f['5_past_exitmid_duration'] = p_exitmid_5s;
    
    f['mean_2_past_exitmid_duration'] = p_exitmid_mean2s;
    f['mean_3_past_exitmid_duration'] = p_exitmid_mean3s;
    f['mean_4_past_exitmid_duration'] = p_exitmid_mean4s;
    f['mean_5_past_exitmid_duration'] = p_exitmid_mean5s;

    f['2_past_midentry_duration'] = p_midentry_2s;
    f['3_past_midentry_duration'] = p_midentry_3s;
    f['4_past_midentry_duration'] = p_midentry_4s;
    f['5_past_midentry_duration'] = p_midentry_5s;
    
    f['mean_2_past_midentry_duration'] = p_midentry_mean2s;
    f['mean_3_past_midentry_duration'] = p_midentry_mean3s;
    f['mean_4_past_midentry_duration'] = p_midentry_mean4s;
    f['mean_5_past_midentry_duration'] = p_midentry_mean5s;

    f['2_past_exitmidentry_duration'] = p_exitmidentry_2s;
    f['3_past_exitmidentry_duration'] = p_exitmidentry_3s;
    f['4_past_exitmidentry_duration'] = p_exitmidentry_4s;
    f['5_past_exitmidentry_duration'] = p_exitmidentry_5s;
    
    f['mean_2_past_exitmidentry_duration'] = p_exitmidentry_mean2s;
    f['mean_3_past_exitmidentry_duration'] = p_exitmidentry_mean3s;
    f['mean_4_past_exitmidentry_duration'] = p_exitmidentry_mean4s;
    f['mean_5_past_exitmidentry_duration'] = p_exitmidentry_mean5s;

    f['Choice Output'] = lagged_df_everything_stats['Choice Output']
    f['lagged_1_choice_ago'] = lagged_df_everything_stats['lagged_1_choice_ago']
    f['lagged_2_choice_ago'] = lagged_df_everything_stats['lagged_2_choice_ago']
    f['lagged_3_choice_ago'] = lagged_df_everything_stats['lagged_3_choice_ago']
    f['lagged_4_choice_ago'] = lagged_df_everything_stats['lagged_4_choice_ago']

    
    df_lagged_added_stats = pd.DataFrame(f);
    df_lagged_added_stats = df_lagged_added_stats.reset_index(drop=True);
    df_lagged_added_stats.to_csv(mouse_id+'_newest_lagged_added_stats.csv');
    return df_lagged_added_stats;

# All Functions organized here

In [48]:
df_mouse_boolean_sa325m1 = pd.read_csv('sa325m1_boolean.csv')
df_mouse_boolean_sa325m2 = pd.read_csv('sa325m2_boolean.csv')
df_mouse_boolean_sa325m3 = pd.read_csv('sa325m3_boolean.csv')
df_mouse_boolean_sa325m4 = pd.read_csv('sa325m4_boolean.csv')
df_mouse_boolean_sa498m2 = pd.read_csv('sa498m2_boolean.csv')
df_mouse_boolean_sa498m3 = pd.read_csv('sa498m3_boolean.csv')
df_mouse_boolean_sa499m1 = pd.read_csv('sa499m1_boolean.csv')
df_mouse_boolean_sa499m2 = pd.read_csv('sa499m2_boolean.csv')
df_mouse_boolean_sa499m3 = pd.read_csv('sa499m3_boolean.csv')

In [49]:
df_mouse_features_sa325m1 = pd.read_csv('sa325m1_features.csv')
df_mouse_features_sa325m2 = pd.read_csv('sa325m2_features.csv')
df_mouse_features_sa325m3 = pd.read_csv('sa325m3_features.csv')
df_mouse_features_sa325m4 = pd.read_csv('sa325m4_features.csv')
df_mouse_features_sa498m2 = pd.read_csv('sa498m2_features.csv')
df_mouse_features_sa498m3 = pd.read_csv('sa498m3_features.csv')
df_mouse_features_sa499m1 = pd.read_csv('sa499m1_features.csv')
df_mouse_features_sa499m2 = pd.read_csv('sa499m2_features.csv')
df_mouse_features_sa499m3 = pd.read_csv('sa499m3_features.csv')


In [50]:
df_mouse_dist_roi_sa325m1 = pd.read_csv('sa325m1_dist_roi.csv')
df_mouse_dist_roi_sa325m2 = pd.read_csv('sa325m2_dist_roi.csv')
df_mouse_dist_roi_sa325m3 = pd.read_csv('sa325m3_dist_roi.csv')
df_mouse_dist_roi_sa325m4 = pd.read_csv('sa325m4_dist_roi.csv')
df_mouse_dist_roi_sa498m2 = pd.read_csv('sa498m2_dist_roi.csv')
df_mouse_dist_roi_sa498m3 = pd.read_csv('sa498m3_dist_roi.csv')
df_mouse_dist_roi_sa499m1 = pd.read_csv('sa499m1_dist_roi.csv')
df_mouse_dist_roi_sa499m2 = pd.read_csv('sa499m2_dist_roi.csv')
df_mouse_dist_roi_sa499m3 = pd.read_csv('sa499m3_dist_roi.csv')


In [51]:
df_entries_sa325m1, df_sorted_entries_sa325m1 = valid_entries_stats(df_mouse_boolean_sa325m1, df_mouse_dist_roi_sa325m1, 'sa325m1_new')
df_entries_sa325m2, df_sorted_entries_sa325m2 = valid_entries_stats(df_mouse_boolean_sa325m2, df_mouse_dist_roi_sa325m2, 'sa325m2_new')
df_entries_sa325m3, df_sorted_entries_sa325m3 = valid_entries_stats(df_mouse_boolean_sa325m3, df_mouse_dist_roi_sa325m3, 'sa325m3_new')
df_entries_sa325m4, df_sorted_entries_sa325m4 = valid_entries_stats(df_mouse_boolean_sa325m4, df_mouse_dist_roi_sa325m4, 'sa325m4_new')
df_entries_sa498m2, df_sorted_entries_sa498m2 = valid_entries_stats(df_mouse_boolean_sa498m2, df_mouse_dist_roi_sa498m2, 'sa498m2_new')
df_entries_sa498m3, df_sorted_entries_sa498m3 = valid_entries_stats(df_mouse_boolean_sa498m3, df_mouse_dist_roi_sa498m3, 'sa498m3_new')
df_entries_sa499m1, df_sorted_entries_sa499m1 = valid_entries_stats(df_mouse_boolean_sa499m1, df_mouse_dist_roi_sa499m1, 'sa499m1_new')
df_entries_sa499m2, df_sorted_entries_sa499m2 = valid_entries_stats(df_mouse_boolean_sa499m2, df_mouse_dist_roi_sa499m2, 'sa499m2_new')
df_entries_sa499m3, df_sorted_entries_sa499m3 = valid_entries_stats(df_mouse_boolean_sa499m3, df_mouse_dist_roi_sa499m3, 'sa499m3_new')


In [52]:
df_other_stats_sa325m1 = all_other_stats(df_sorted_entries_sa325m1, df_mouse_features_sa325m1, 'sa325m1_new', 0.99045)
df_other_stats_sa325m2 = all_other_stats(df_sorted_entries_sa325m2, df_mouse_features_sa325m2, 'sa325m2_new',0.99045)
df_other_stats_sa325m3 = all_other_stats(df_sorted_entries_sa325m3, df_mouse_features_sa325m3, 'sa325m3_new',0.99045)
df_other_stats_sa325m4 = all_other_stats(df_sorted_entries_sa325m4, df_mouse_features_sa325m4, 'sa325m4_new',0.99045)
df_other_stats_sa498m2 = all_other_stats(df_sorted_entries_sa498m2, df_mouse_features_sa498m2, 'sa498m2_new',0.99045)
df_other_stats_sa498m3 = all_other_stats(df_sorted_entries_sa498m3, df_mouse_features_sa498m3, 'sa498m3_new',0.99045)
df_other_stats_sa499m1 = all_other_stats(df_sorted_entries_sa499m1, df_mouse_features_sa499m1, 'sa499m1_new',0.99045)
df_other_stats_sa499m2 = all_other_stats(df_sorted_entries_sa499m2, df_mouse_features_sa499m2, 'sa499m2_new',0.99045)
df_other_stats_sa499m3 = all_other_stats(df_sorted_entries_sa499m3, df_mouse_features_sa499m3, 'sa499m3_new',0.99045)


In [53]:
df_lagged_coeff_sa325m1 = add_lag_dir_coefficients(df_other_stats_sa325m1, df_sorted_entries_sa325m1, 'sa325m1_new')
df_lagged_coeff_sa325m2 = add_lag_dir_coefficients(df_other_stats_sa325m2, df_sorted_entries_sa325m2, 'sa325m2_new')
df_lagged_coeff_sa325m3 = add_lag_dir_coefficients(df_other_stats_sa325m3, df_sorted_entries_sa325m3, 'sa325m3_new')
df_lagged_coeff_sa325m4 = add_lag_dir_coefficients(df_other_stats_sa325m4, df_sorted_entries_sa325m4, 'sa325m4_new')
df_lagged_coeff_sa498m2 = add_lag_dir_coefficients(df_other_stats_sa498m2, df_sorted_entries_sa498m2, 'sa498m2_new')
df_lagged_coeff_sa498m3 = add_lag_dir_coefficients(df_other_stats_sa498m3, df_sorted_entries_sa498m3, 'sa498m3_new')
df_lagged_coeff_sa499m1 = add_lag_dir_coefficients(df_other_stats_sa499m1, df_sorted_entries_sa499m1, 'sa499m1_new')
df_lagged_coeff_sa499m2 = add_lag_dir_coefficients(df_other_stats_sa499m2, df_sorted_entries_sa499m2, 'sa499m2_new')
df_lagged_coeff_sa499m3 = add_lag_dir_coefficients(df_other_stats_sa499m3, df_sorted_entries_sa499m3, 'sa499m3_new')


In [54]:
df_everything_stats_sa325m1 = add_choice_output(df_sorted_entries_sa325m1, df_other_stats_sa325m1, df_lagged_coeff_sa325m1, 'sa325m1_new')
df_everything_stats_sa325m2 = add_choice_output(df_sorted_entries_sa325m2, df_other_stats_sa325m2, df_lagged_coeff_sa325m2, 'sa325m2_new')
df_everything_stats_sa325m3 = add_choice_output(df_sorted_entries_sa325m3, df_other_stats_sa325m3, df_lagged_coeff_sa325m3, 'sa325m3_new')
df_everything_stats_sa325m4 = add_choice_output(df_sorted_entries_sa325m4, df_other_stats_sa325m4, df_lagged_coeff_sa325m4, 'sa325m4_new')
df_everything_stats_sa498m2 = add_choice_output(df_sorted_entries_sa498m2, df_other_stats_sa498m2, df_lagged_coeff_sa498m2, 'sa498m2_new')
df_everything_stats_sa498m3 = add_choice_output(df_sorted_entries_sa498m3, df_other_stats_sa498m3, df_lagged_coeff_sa498m3, 'sa498m3_new')
df_everything_stats_sa499m1 = add_choice_output(df_sorted_entries_sa499m1, df_other_stats_sa499m1, df_lagged_coeff_sa499m1, 'sa499m1_new')
df_everything_stats_sa499m2 = add_choice_output(df_sorted_entries_sa499m2, df_other_stats_sa499m2, df_lagged_coeff_sa499m2, 'sa499m2_new')
df_everything_stats_sa499m3 = add_choice_output(df_sorted_entries_sa499m3, df_other_stats_sa499m3, df_lagged_coeff_sa499m3, 'sa499m3_new')


In [55]:
mm_everything_stats_sa325m1 = scale_it(df_everything_stats_sa325m1, 'sa325m1_new')
mm_everything_stats_sa325m2 = scale_it(df_everything_stats_sa325m2, 'sa325m2_new')
mm_everything_stats_sa325m3 = scale_it(df_everything_stats_sa325m3, 'sa325m3_new')
mm_everything_stats_sa325m4 = scale_it(df_everything_stats_sa325m4, 'sa325m4_new')
mm_everything_stats_sa498m2 = scale_it(df_everything_stats_sa498m2, 'sa498m2_new')
mm_everything_stats_sa498m3 = scale_it(df_everything_stats_sa498m3, 'sa498m3_new')
mm_everything_stats_sa499m1 = scale_it(df_everything_stats_sa499m1, 'sa499m1_new')
mm_everything_stats_sa499m2 = scale_it(df_everything_stats_sa499m2, 'sa499m2_new')
mm_everything_stats_sa499m3 = scale_it(df_everything_stats_sa499m3, 'sa499m3_new')


In [56]:
mm_pls_sa325m1 = pl.read_csv('sa325m1_new_newest_mm_everything_stats.csv')
mm_pls_sa325m2 = pl.read_csv('sa325m2_new_newest_mm_everything_stats.csv')
mm_pls_sa325m3 = pl.read_csv('sa325m3_new_newest_mm_everything_stats.csv')
mm_pls_sa325m4 = pl.read_csv('sa325m4_new_newest_mm_everything_stats.csv')
mm_pls_sa498m2 = pl.read_csv('sa498m2_new_newest_mm_everything_stats.csv')
mm_pls_sa498m3 = pl.read_csv('sa498m3_new_newest_mm_everything_stats.csv')
mm_pls_sa499m1 = pl.read_csv('sa499m1_new_newest_mm_everything_stats.csv')
mm_pls_sa499m2 = pl.read_csv('sa499m2_new_newest_mm_everything_stats.csv')
mm_pls_sa499m3 = pl.read_csv('sa499m3_new_newest_mm_everything_stats.csv')


In [57]:
lagged_mm_everything_stats_sa325m1 = lagging_four_prev_choices(mm_pls_sa325m1, 'sa325m1_new')
lagged_mm_everything_stats_sa325m2 = lagging_four_prev_choices(mm_pls_sa325m2, 'sa325m2_new')
lagged_mm_everything_stats_sa325m3 = lagging_four_prev_choices(mm_pls_sa325m3, 'sa325m3_new')
lagged_mm_everything_stats_sa325m4 = lagging_four_prev_choices(mm_pls_sa325m4, 'sa325m4_new')
lagged_mm_everything_stats_sa498m2 = lagging_four_prev_choices(mm_pls_sa498m2, 'sa498m2_new')
lagged_mm_everything_stats_sa498m3 = lagging_four_prev_choices(mm_pls_sa498m3, 'sa498m3_new')
lagged_mm_everything_stats_sa499m1 = lagging_four_prev_choices(mm_pls_sa499m1, 'sa499m1_new')
lagged_mm_everything_stats_sa499m2 = lagging_four_prev_choices(mm_pls_sa499m2, 'sa499m2_new')
lagged_mm_everything_stats_sa499m3  = lagging_four_prev_choices(mm_pls_sa499m3, 'sa499m3_new')


In [58]:
df_lagged_mm_everything_stats_sa325m1 = pd.read_csv('sa325m1_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa325m2 = pd.read_csv('sa325m2_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa325m3 = pd.read_csv('sa325m3_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa325m4 = pd.read_csv('sa325m4_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa498m2 = pd.read_csv('sa498m2_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa498m3 = pd.read_csv('sa498m3_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa499m1 = pd.read_csv('sa499m1_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa499m2 = pd.read_csv('sa499m2_new_newest_lagged_mm_everything_stats.csv')
df_lagged_mm_everything_stats_sa499m3 = pd.read_csv('sa499m3_new_newest_lagged_mm_everything_stats.csv')


In [59]:
df_lagged_added_stats_sa325m1 = added_time_means(df_lagged_mm_everything_stats_sa325m1, 'sa325m1_new')
df_lagged_added_stats_sa325m2 = added_time_means(df_lagged_mm_everything_stats_sa325m2, 'sa325m2_new')
df_lagged_added_stats_sa325m3 = added_time_means(df_lagged_mm_everything_stats_sa325m3, 'sa325m3_new')
df_lagged_added_stats_sa325m4 = added_time_means(df_lagged_mm_everything_stats_sa325m4, 'sa325m4_new')
df_lagged_added_stats_sa498m2 = added_time_means(df_lagged_mm_everything_stats_sa498m2, 'sa498m2_new')
df_lagged_added_stats_sa498m3 = added_time_means(df_lagged_mm_everything_stats_sa498m3, 'sa498m3_new')
df_lagged_added_stats_sa499m1 = added_time_means(df_lagged_mm_everything_stats_sa499m1, 'sa499m1_new')
df_lagged_added_stats_sa499m2 = added_time_means(df_lagged_mm_everything_stats_sa499m2, 'sa499m2_new')
df_lagged_added_stats_sa499m3 = added_time_means(df_lagged_mm_everything_stats_sa499m3, 'sa499m3_new')


In [None]:
sa325m1
sa325m2
sa325m3
sa325m4
sa498m2
sa498m3
sa499m1
sa499m2
sa499m3



