# Building the design matrix for the GLM on FlexiVexi data 

We want a series of variables:
- Cue identity: 0 or 1
- History  of last 5 choices
- Last rewarded choice
- Distance to 0
- Distance to 1

They will form a matrix of variables x trial types

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import GoalSelection.training_metrics as tm
from pathlib import Path

## Data extraction

In [None]:
DATA = Path('/Volumes/sjones/projects/FlexiVexi/behavioural_data')
MOUSE = 'FNT103'
DATE = '2024-08-02'
PORTS = [[0.6, 0.35], 
         [-0.6, 0.35], 
         [0, -0.7]]

exp_data = tm.build_exp_data(DATA, MOUSE, DATE)

In [None]:
exp_data

We will: 
- Get rid of aborted trials
- Get rid of 5 first trials
- Assume that last-5-trials can be computing ignoring aborted trials. This  is  even though there are prolongued periods of time w/o many trials. 

In [None]:
def filter_data(exp_data):
    '''
    We will: 
        - Get rid of aborted trials
        - Get rid of 5 first trials
        - Assume that last-5-trials can be computing ignoring 
        aborted trials. This  is  even though there are prolongued periods of time w/o many trials. 
    '''
    filtered_data = exp_data[~exp_data['TrialCompletionCode'].str.startswith('Aborted')]
    return  filtered_data

filtered_data = filter_data(exp_data)

And let's look at the variables one by one

In [None]:
def get_cue(row):
    if row['AudioCueIdentity']==10:
        cue = 1
    elif row['AudioCueIdentity']==14:
        cue = 0
    else:
        print('UNRECOGNISED SOUND CUE')
        cue  =  None
    return  cue

def distance_to_port(row, port):
    port = np.array(PORTS[port])
    dot = np.array([row['DotXLocation'], row['DotYLocation']])
    v_distance = port-dot
    distance = np.sqrt((v_distance[0]**2)+(v_distance[1]**2))
    
    return distance

def get_last_rewarded(iloc, filtered_data):
    if iloc <= 0 or iloc > len(filtered_data):
        raise ValueError("Invalid iloc value")

    new_row = filtered_data.iloc[iloc-1]
    jump_back = 1
    
    while not new_row['TrialCompletionCode'].startswith('Rewarded'):
        
        jump_back += 1
        if iloc - jump_back < 0:
            raise ValueError("No previous 'Rewarded' TrialCompletionCode found")
        new_row = filtered_data.iloc[iloc-jump_back]
    
    last_rew = new_row['TrialCompletionCode'][-1]
    return last_rew

def get_last(position, iloc, filtered_data):
    new_row = filtered_data.iloc[iloc-position]
    past_choice= new_row['TrialCompletionCode'][-1]
    return past_choice




In [None]:
def build_design_matrix(filtered_data):
    trial=[]
    cue  =  []
    last_rewarded = []
    last_1 = []
    last_2 = []
    last_3 = []
    last_4 = []
    last_5 = []
    distance_0 = []
    distance_1 = []
    iloc = 4 #to generate a purely positional index,not the pandas index,so as to look
    #at last rows

    for index, row in filtered_data.iloc[5:].iterrows():
        iloc+=1
        #print(f'iloc: {iloc}')

        trial.append(row['TrialNumber'])
        cue.append(get_cue(row))
        distance_0.append(distance_to_port(row, 0))
        distance_1.append(distance_to_port(row, 1))
        last_rewarded.append(get_last_rewarded(iloc, filtered_data))
        last_1.append(get_last(1, iloc, filtered_data))
        last_2.append(get_last(2, iloc, filtered_data))
        last_3.append(get_last(3, iloc, filtered_data))
        last_4.append(get_last(4, iloc, filtered_data))
        last_5.append(get_last(5, iloc, filtered_data))

    design_matrix = {
        'trial': trial,
        'cue': cue,
        'distance_0': distance_0,
        'distance_1': distance_1,
        'last_rewarded': last_rewarded,
        'last_1': last_1,
        'last_2': last_2,
        'last_3': last_3,
        'last_4': last_4,
        'last_5': last_5
    }

    x = pd.DataFrame(design_matrix)

    return x

x = build_design_matrix(filtered_data)