In [None]:
import pandas as pd
import lecilab_behavior_analysis.utils as utils
import lecilab_behavior_analysis.plots as plots
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import lecilab_behavior_analysis.df_transforms as dft
from sklearn.linear_model import LogisticRegression
import seaborn as sns
%load_ext autoreload
%autoreload 2


single mouse

In [None]:
# load data from cluster
tv_projects = utils.get_server_projects()
print(tv_projects)
# see the available animals
animals = utils.get_animals_in_project(tv_projects[1])
print(animals)
# download the data for a specific animal
mouse = "ACV007"
local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
# create the directory if it doesn't exist
local_path.mkdir(parents=True, exist_ok=True)
# download the session data
utils.rsync_session_data(
    project_name=tv_projects[1],
    animal=mouse,
    local_path=str(local_path),
    credentials=utils.get_idibaps_cluster_credentials(),
)
# load the data
df = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
# reduce the dataset to the psychometric version of the task
# Otherwise, we would include a lot of "easy" trials that would bias the fit
df_test = df[df["current_training_stage"] == "TwoAFC_visual_hard"]

psychometric curve 

In [None]:
df_test = dft.get_performance_by_difficulty_ratio(df_test)
plots.psychometric_plot_by_discreVal(df_test, x = 'visual_stimulus_ratio', y = 'left_choice')


In [None]:
df_test = dft.get_performance_by_difficulty_diff(df_test)
plots.psychometric_plot_by_continuVal(df_test, x = 'visual_stimulus_diff', y = 'left_choice')

GLM comparation

the following cell can be use to evaluate the model. It will be useful when comparing different models

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from scipy.optimize import minimize
import numpy as np

# Define the lapse logistic function with independent lapses for left and right
def lapse_logistic_independent(params, x, y):
    lapse_left, lapse_right, beta, x0 = params
    # Ensure lapse rates are within [0, 0.5]
    lapse_left = np.clip(lapse_left, 0, 0.5)
    lapse_right = np.clip(lapse_right, 0, 0.5)
    # Predicted probabilities
    p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (x - x0)))
    # Negative log-likelihood
    nll = -np.sum(y * np.log(p_left) + (1 - y) * np.log(1 - p_left))
    return nll

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)  # 5-fold cross-validation
log_losses = []

# Perform cross-validation
for train_index, test_index in kf.split(df_test):
    # Split the data
    x_train, x_test = df_test['visual_stimulus_ratio'].values[train_index], df_test['visual_stimulus_ratio'].values[test_index]
    y_train, y_test = df_test['left_choice'].values[train_index], df_test['left_choice'].values[test_index]
    
    # Initial parameter guesses: [lapse_left, lapse_right, beta, x0]
    initial_params = [0.05, 0.05, 1, 0]
    
    # Fit the model on the training data
    result = minimize(
        lapse_logistic_independent,
        initial_params,
        args=(x_train, y_train),
        bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
    )
    
    # Extract fitted parameters
    lapse_left, lapse_right, beta, x0 = result.x
    
    # Generate predictions on the test data
    p_left_test = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (x_test - x0)))
    
    # Calculate log loss for the test data
    loss = log_loss(y_test, p_left_test)
    log_losses.append(loss)

# Print cross-validation results
print(f"Cross-Validation Log Losses: {log_losses}")
print(f"Mean Log Loss: {np.mean(log_losses)}")
print(f"Standard ratio of Log Loss: {np.std(log_losses)}")

weight and stats for the different predictors:
- visual stimulus ratio (you call it deviation)
- visual stimulus diff. Nuo: change to "total intensity on left port"
- port where the animal is coming from
- interactions
- Nuo: add another regressor: the previous correct choice

We can play around with this things

In [None]:
df_new_for_fit = dft.parameters_for_fit(df_test)
results = utils.logi_model_fit(df_new_for_fit, X = ['abs_visual_stimulus_ratio',
                      'previous_port_before_stimulus_numeric',
                      'visual_ratio_diff_interact',
                      'previous_choice_left_correct',
                      'previous_choice_right_wrong',
                      'previous_choice_left', 
                      'visual_ratio_bright_interact'
                          ], y = 'left_choice')

correct choice as output

In [None]:
df_new_for_fit = dft.parameters_for_fit(df_test)
results = utils.logi_model_fit(df_new_for_fit, X = ['abs_visual_stimulus_ratio',
                                                    'wrong_bright', 
                                                    # 'wrong_bright_zscore',
                                                    'same_choice_correctPre', 
                                                    # 'diff_choice_wrongPre', 
                                                    'same_choice_previous', 
                                                    'previous_correct', 
                                                    # 'previous_choice_same_correct'
                                                    ], y = 'correct_numeric')
print(results) 

Correct wrong psychometric curve

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
for i, linecolor in zip(df_test[df_test['previous_choice'] == 'left'].groupby('previous_correct'), ['red', 'green']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[0],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )

for i, linecolor in zip(df_test[df_test['previous_choice'] == 'right'].groupby('previous_correct'), ['red', 'green']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[1],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )
ax[0].legend()
ax[0].set_title("Left Choice Previous")
ax[1].legend()
ax[1].set_title("Right Choice Previous")

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
for i, linecolor in zip(df_test[df_test['previous_correct'] == True].groupby('previous_choice'), ['gold', 'lightskyblue']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[0],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )

for i, linecolor in zip(df_test[df_test['previous_correct'] == False].groupby('previous_choice'), ['gold', 'lightskyblue']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[1],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )
ax[0].legend()
ax[0].set_title("Correct Choice Previous")
ax[1].legend()
ax[1].set_title("Incorrect Choice Previous")

Matrix format

In [None]:
# let's use the absolute value of the lowest visual stimulus as a proxy for the brightness of the visual stimulus
df_test['visual_stimulus_lowest'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0]) if eval(x)[0] < eval(x)[1] else abs(eval(x)[1]))
# create 10 bins for the absolute value of the lowest visual stimulus
min_value = df_test['visual_stimulus_lowest'].min()
max_value = df_test['visual_stimulus_lowest'].max()
bins = np.linspace(min_value, max_value, 11)
df_test['visual_stimulus_lowest_binned'] = pd.cut(df_test['visual_stimulus_lowest'], bins=bins, labels=[f"{b:.2f}" for b in bins[:-1]])
# create a pivot table with the visual stimulus ratio and absolute value of the lowest visual stimulus
pivot_table_abs = df_test.pivot_table(
    index='visual_stimulus_lowest_binned',
    columns='visual_stimulus_ratio',
    values='left_choice',
    aggfunc='mean',
    observed=True
)
# plot the heatmap
plt.figure(figsize=(5, 5))
sns.heatmap(pivot_table_abs, cmap='coolwarm', annot=True, fmt=".2f", cbar_kws={'label': 'Probability of Left Choice'})
plt.xlabel("Visual Stimulus ratio")
plt.ylabel("Absolute Value of Lowest Visual Stimulus")
plt.title("Heatmap of Probability of Left Choice")
# rotate the y-axis labels
plt.yticks(rotation=0)
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
# transform visual_stimulus_lowest_binned to a numeric value for plotting
df_test['visual_stimulus_lowest_binned_num'] = pd.to_numeric(df_test['visual_stimulus_lowest_binned'], errors='coerce')

# make two plots, one for when the animals comes from the left and one for when it comes from the right
fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
# Plot for when the animal comes from the left
for ax, side in zip(axs.ravel(), ['left', 'right']):
    df_side = df_test[df_test['previous_port_before_stimulus'] == side]
    for i in df_side.groupby('visual_stimulus_ratio'):
        df_i = i[1].sort_values(by='visual_stimulus_lowest_binned_num')
        # drop nan
        df_i = df_i.dropna(subset=['visual_stimulus_lowest_binned_num'])
        X = df_i['visual_stimulus_lowest_binned_num'].values.reshape(-1, 1)
        y = df_i['left_choice'].values.astype(int)
        model = LogisticRegression()
        model.fit(X, y)
        y_pred = model.predict(X)
        y_prob = model.predict_proba(X)[:, 1]
        ax.plot(X, y_prob, label=f"Visual Stimulus ratio: {i[0]}")
    ax.set_xlabel("Absolute Value of Lowest Visual Stimulus")
    ax.set_ylabel("Probability of Left Choice")
    ax.legend()
    ax.set_title(f"Last Choice Before Stimulus: {side.capitalize()}")
plt.show()

Fit the lapse model independently considering previous choices

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

colors = ["blue", "orange"]

for color, side in zip(colors, ['left', 'right']):
    df_side = df_test[df_test['previous_port_before_stimulus'] == side]
    # Fit the model
    x = df_side['visual_stimulus_ratio'].values
    y = df_side['left_choice'].values
    result = minimize(
        lapse_logistic_independent,
        initial_params,
        args=(x, y),
        bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
    )

    # Extract fitted parameters
    lapse_left, lapse_right, beta, x0 = result.x
    print(f"Side: {side}, Lapse Left: {lapse_left}, Lapse Right: {lapse_right}, Slope (Beta): {beta}, PSE (x0): {x0}")

    # Generate predictions
    xs = np.linspace(df_side['visual_stimulus_ratio'].min(), df_side['visual_stimulus_ratio'].max(), 100)
    p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (xs - x0)))

    # Plot the fitted curve

    sns.pointplot(
        x='visual_stimulus_ratio',
        y='left_choice',
        data=df_side,
        estimator=lambda x: np.mean(x),
        color=color,
        markers='o',
        errorbar=("ci", 95),
        ax=ax,
        label=f'Choices when coming from {side}',
        native_scale=True,
        linestyles='',
    )
    ax.plot(xs, p_left, color=color, label='Lapse Logistic Fit')
    ax.set_xlabel("Visual Stimulus ratio")
    ax.set_ylabel("Probability of Left Choice")
    plt.title(f"Psychometric Curves")
    ax.legend()
plt.show()

I kept what you did for comparison here

In [None]:
# It is interesting to compare the effects of the relative difference between the two visual stimuli,
# and the absolute difference between them.

# Maybe what we can do is to train another logistic regression model, adding as well the absolute difference
# between the two visual stimuli, and see how it affects the probability of a left choice.
# Do you know what I mean?

for i in df_test.groupby('visual_stimulus_ratio'):
    df_i = i[1].sort_values(by='visual_stimulus_diff')
    X = df_i['visual_stimulus_diff'].values.reshape(-1, 1)
    y = df_i['left_choice'].values.astype(int)
    model = LogisticRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]
    plt.plot(X, y_prob, label=f"Visual Stimulus ratio: {i[0]}")
    plt.legend()
plt.xlabel("Visual Stimulus Difference")
plt.ylabel("Probability of Left Choice")
plt.show()

Multiple animals analysis


In [None]:
df_dic = {}
for mouse in animals:
    local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
    # create the directory if it doesn't exist
    local_path.mkdir(parents=True, exist_ok=True)
    # download the session data
    utils.rsync_session_data(
        project_name=tv_projects[1],
        animal=mouse,
        local_path=str(local_path),
        credentials=utils.get_idibaps_cluster_credentials(),
    )
    # load the data
    df_dic[mouse] = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
df_dic_hard = {}
for df_name, df in zip(df_dic.keys(), df_dic.values()):
    if 'TwoAFC_visual_hard' in df["current_training_stage"].unique():
        df = df.dropna(subset = ['visual_stimulus'])
        df = df[df["current_training_stage"] == "TwoAFC_visual_hard"]

        df['visual_stimulus_ratio'] = df['visual_stimulus'].apply(lambda x: abs(round(eval(x)[0] / eval(x)[1], 4)))
        df['visual_stimulus_ratio'] = df.apply(
            lambda row: row['visual_stimulus_ratio'] if row['correct_side'] == 'left' else -row['visual_stimulus_ratio'],
            axis=1
        )
        df['visual_stimulus_diff'] = df['visual_stimulus'].apply(lambda x: abs(eval(x)[0] - eval(x)[1]))
        df['visual_stimulus_diff'] = df.apply(
            lambda row: row['visual_stimulus_diff'] if row['correct_side'] == 'left' else -row['visual_stimulus_diff'],
            axis=1
        )
        df["visual_stimulus_diff_binned"] = df['visual_stimulus_diff'] // 0.1
        df = dft.add_mouse_first_choice(df)
        df['left_choice'] = df['first_choice'].apply(lambda x: 1 if x == 'left' else 0)
        
        df_dic_hard[df_name] = df

In [None]:
df_ratio_diffBin_inter_p = pd.DataFrame()
df_ratio_diffBin_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_ratio', 'visual_stimulus_diff_binned', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_ratio_diffBin_inter_p[df_name] = logit_model_multi.pvalues
    df_ratio_diffBin_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_ratio_diff_inter_p = pd.DataFrame()
df_ratio_diff_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_ratio', 'visual_stimulus_diff', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_ratio_diff_inter_p[df_name] = logit_model_multi.pvalues
    df_ratio_diff_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_ratio_diffBin_inter_p.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_ratio_diff_inter_p.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_ratio_diff_inter_p[df_name], label=df_name+ 'ratio_diff_inter', color=color)
    plt.plot (df_ratio_diffBin_inter_p[df_name], label=df_name+ 'ratio_diffBin_inter', color=color, linestyle='--')
plt.axhline(y=0.05, color='k', linestyle='--', label='p-value threshold')
plt.xlabel("Regressors")
plt.ylabel("p-value")
plt.legend(loc = (1 , 0))

In [None]:
df_ratio_diffBin_inter_coef.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_ratio_diff_inter_coef.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_ratio_diff_inter_coef[df_name], label=df_name+ 'ratio_diff_inter', color=color)
    plt.plot (df_ratio_diffBin_inter_coef[df_name], label=df_name+ 'ratio_diffBin_inter', color=color, linestyle='--')
plt.xlabel("Regressors")
plt.ylabel("Coefficient")
plt.legend(loc = (1 , 0))