# Building the design matrix for the GLM on FlexiVexi data 

We want a series of variables:
- Cue identity: 0 or 1
- History  of last 5 choices
- Last rewarded choice
- Distance to 0
- Distance to 1

They will form a matrix of variables x trial types

## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import GoalSelection.training_metrics as tm
from pathlib import Path
import glmhmm.glm as glm
import FlexiVexi_glm.design_matrix as dm

## Data extraction

In [None]:
DATA = Path('/Volumes/sjones/projects/FlexiVexi/behavioural_data')
MOUSE = 'FNT103'
DATE = '2024-08-02'
PORTS = [[0.6, 0.35], 
         [-0.6, 0.35], 
         [0, -0.7]]

exp_data = tm.build_exp_data(MOUSE, DATE)

In [None]:
exp_data

We will: 
- Get rid of aborted trials
- Get rid of 5 first trials
- Assume that last-5-trials can be computing ignoring aborted trials. This  is  even though there are prolongued periods of time w/o many trials. 

In [None]:
def filter_data(exp_data):
    '''
    We will: 
        - Get rid of aborted trials
        - Get rid of 5 first trials
        - Assume that last-5-trials can be computing ignoring 
        aborted trials. This  is  even though there are prolongued periods of time w/o many trials. 
    '''
    filtered_data = exp_data[~exp_data['TrialCompletionCode'].str.startswith('Aborted')]
    return  filtered_data

filtered_data = filter_data(exp_data)

And let's look at the variables one by one

In [None]:
def get_cue(row):
    if row['AudioCueIdentity']==10:
        cue = 1
    elif row['AudioCueIdentity']==14:
        cue = 0
    else:
        print('UNRECOGNISED SOUND CUE')
        cue  =  None
    return  cue

def distance_to_port(row, port):
    port = np.array(PORTS[port])
    dot = np.array([row['DotXLocation'], row['DotYLocation']])
    v_distance = port-dot
    distance = np.sqrt((v_distance[0]**2)+(v_distance[1]**2))
    
    return distance

def get_last_rewarded(iloc, filtered_data):
    if iloc <= 0 or iloc > len(filtered_data):
        raise ValueError("Invalid iloc value")

    new_row = filtered_data.iloc[iloc-1]
    jump_back = 1
    
    while not new_row['TrialCompletionCode'].startswith('Rewarded'):
        
        jump_back += 1
        if iloc - jump_back < 0:
            raise ValueError("No previous 'Rewarded' TrialCompletionCode found")
        new_row = filtered_data.iloc[iloc-jump_back]
    
    last_rew = new_row['TrialCompletionCode'][-1]
    return last_rew

def get_last(position, iloc, filtered_data):
    new_row = filtered_data.iloc[iloc-position]
    past_choice= new_row['TrialCompletionCode'][-1]
    return past_choice




In [None]:
def build_design_matrix(filtered_data):
    trial=[]
    cue  =  []
    last_rewarded = []
    last_1 = []
    last_2 = []
    last_3 = []
    last_4 = []
    last_5 = []
    distance_0 = []
    distance_1 = []
    iloc = 4 #to generate a purely positional index,not the pandas index,so as to look
    #at last rows

    output_vector = [] #For the actual choice in each trial

    for index, row in filtered_data.iloc[5:].iterrows():
        iloc+=1
        #print(f'iloc: {iloc}')

        trial.append(row['TrialNumber'])
        cue.append(get_cue(row))
        distance_0.append(distance_to_port(row, 0))
        distance_1.append(distance_to_port(row, 1))
        last_rewarded.append(get_last_rewarded(iloc, filtered_data))
        last_1.append(get_last(1, iloc, filtered_data))
        last_2.append(get_last(2, iloc, filtered_data))
        last_3.append(get_last(3, iloc, filtered_data))
        last_4.append(get_last(4, iloc, filtered_data))
        last_5.append(get_last(5, iloc, filtered_data))

        output_vector.append(row['TrialCompletionCode'][-1])

    design_matrix = {
        'trial': trial,
        'cue': cue,
        'distance_0': distance_0,
        'distance_1': distance_1,
        'last_rewarded': last_rewarded,
        'last_1': last_1,
        'last_2': last_2,
        'last_3': last_3,
        'last_4': last_4,
        'last_5': last_5
    }

    design = pd.DataFrame(design_matrix)
    y = np.array(output_vector).astype(float)

    return design, y

design,y = build_design_matrix(filtered_data)

In [None]:
design

## Fitting their GLM object

n: number of data/time points
d: number of features (inputs to design matrix)
c: number of classes (possible observations)
x: design matrix (nxm)
y: observations (nxc)
w: weights mapping x to y (mxc or mx1)

Therefore, our design matrix is timepoints x features

In [None]:
def build_GLM(design, y):
    n = len(design)
    d = len(design.columns)-1
    c = 2

    GLM = glm.GLM(n, d, c) 

    return GLM

GLM  = build_GLM(design, y)

In [None]:
GLM.observations

In [None]:
w_init =  GLM.init_weights()

In [None]:
def format_matrix(design):
    X = np.array([
        design['cue'].tolist(), 
        design['last_1'].tolist(), 
        design['last_2'].tolist(), 
        design['last_3'].tolist(), 
        design['last_4'].tolist(), 
        design['last_5'].tolist(), 
        design['last_rewarded'].tolist(),
        design['distance_0'].tolist(),
        design['distance_1'].tolist()
    ])
    X = X.T
    X = X.astype(float)

    return X

X = format_matrix(design)
X.shape

In [None]:
y.shape

In [None]:
w_init.shape

In [None]:
w, phi  = GLM.fit(X, w_init, y)

In [None]:
xlabels = [
    'Cue identity',
    'History of last choice 1',
    'History of last choice 2',
    'History of last choice 3',
    'History of last choice 4',
    'History of last choice 5',
    'Last rewarded choice',
    'Distance to 0',
    'Distance to 1'
]

fig, ax  = plt.subplots()
ax.plot(w)
ax.set_xticklabels(xlabels, rotation =  90)
ax.plot(xlabels,np.zeros((len(xlabels),1)),'k--')
ax.set_xticks(np.arange(0,len(xlabels)))

## Many sessions

Concatenate design matrices for many different sessions

In [None]:
def design_matrix_per_mouse(mouse, start_session = -10):

    date_dirs = tm.get_date_dirs(mouse)
    date_dirs = date_dirs[start_session:]

    design_list  = []
    output_list =  []
    date_list = []
    trial_list = []

    for date in date_dirs:
        print(date)
        #build the matrix
        exp_data = tm.build_exp_data(MOUSE, date)
        filtered_data = dm.filter_data(exp_data)
        design, y = dm.build_design_matrix(filtered_data)
        design_list.append(design)
        output_list.append(y)
        date_list.append([date]*len(design))
        trial_list.append(design['trial'])

    design_concat = pd.concat(design_list, ignore_index   = True)
    y = np.concatenate(output_list)
    date_concat = np.concatenate(date_list)
    trial_concat =  pd.concat(trial_list, ignore_index = True)

    X = dm.format_matrix(design_concat)
    row_identity = pd.DataFrame({'date': date_concat, 
                                'trial': trial_concat})

    return X, y, row_identity, design_concat

X, y, row_identity, design_concat = dm.design_matrix_per_mouse('FNT103', -19)

In [None]:
GLM = dm.build_GLM(design_concat, y)
w_init =  GLM.init_weights()
w, phi  = GLM.fit(X, w_init, y)


In [None]:
bias = True
mouse = 'FNT108'

def plot_model_weights(mouse, GLM, bias = True):

    if bias:

        xlabels = [
            'Cue identity',
            'History of last choice 1',
            'History of last choice 2',
            'History of last choice 3',
            'History of last choice 4',
            'History of last choice 5',
            'Last rewarded choice',
            'Distance to 0',
            'Distance to 1',
            'bias'
        ]
    else:
            
            xlabels = [
            'Cue identity',
            'History of last choice 1',
            'History of last choice 2',
            'History of last choice 3',
            'History of last choice 4',
            'History of last choice 5',
            'Last rewarded choice',
            'Distance to 0',
            'Distance to 1'
        ]

    fig, ax  = plt.subplots()
    ax.set_facecolor('white')
    ax.plot(GLM.w)
    ax.set_xticklabels(xlabels, rotation =  90)
    ax.plot(xlabels,np.zeros((len(xlabels),1)),'k--')
    ax.set_xticks(np.arange(0,len(xlabels)))
    ax.set_ylabel('Weights')
    fig.suptitle(f'GLM weights for {mouse}, {GLM.n} trials')

    return fig, ax

fig, ax = plot_model_weights('FNT103', GLM)


In [None]:
design_concat