In [None]:
import pandas as pd
import lecilab_behavior_analysis.utils as utils
import lecilab_behavior_analysis.plots as plots
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import lecilab_behavior_analysis.df_transforms as dft
from sklearn.linear_model import LogisticRegression
import seaborn as sns
%load_ext autoreload
%autoreload 2

single mouse

In [None]:
# load data from cluster
tv_projects = utils.get_server_projects()
print(tv_projects)
# see the available animals
animals = utils.get_animals_in_project(tv_projects[1])
print(animals)
# download the data for a specific animal
mouse = "ACV007"
local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
# create the directory if it doesn't exist
local_path.mkdir(parents=True, exist_ok=True)
# download the session data
utils.rsync_session_data(
    project_name=tv_projects[1],
    animal=mouse,
    local_path=str(local_path),
    credentials=utils.get_idibaps_cluster_credentials(),
)
# load the data
df = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
# reduce the dataset to the psychometric version of the task
# Otherwise, we would include a lot of "easy" trials that would bias the fit
df_test = df[df["current_training_stage"] == "TwoAFC_visual_hard"]

psychometric curve 

In [None]:
df_test = dft.get_performance_by_difficulty_ratio(df_test)
plots.psychometric_plot(df_test, x = 'visual_stimulus_ratio', y = 'left_choice')
plt.show()

In [None]:
# Example plot with log-scaled y-axis
sns.pointplot(
    x='visual_stimulus_ratio',
    y='left_choice',
    data=df_test,
    estimator=lambda x: np.log(np.mean(x) / (1 - np.mean(x))),  # Log odds transformation
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    native_scale=True,
    linestyles='',
)

X = df_test['visual_stimulus_ratio'].unique()
log_odds = []
for x in X:
    p = df_test[df_test['visual_stimulus_ratio'] == x]['left_choice'].mean()
    log_odds.append(np.log(p / (1 - p)))

# fit a simple line to the points
model = np.polyfit(X, log_odds, 1)
xs = np.linspace(df_test['visual_stimulus_ratio'].min(), df_test['visual_stimulus_ratio'].max(), 100).reshape(-1, 1)
predicted_log_odds = model[0] * xs + model[1]

plt.plot(xs, predicted_log_odds, color='red', label='Predicted Log Odds')
plt.legend()

plt.xlabel("Visual Stimulus Ratio")
plt.ylabel("Log Odds of Left Choice")
plt.title("Psychometric Curve with Log Odds")

plt.show()

GLM comparation

In [None]:
utils.column_checker(df_test, required_columns={x for x in parameters_for_fit})

In [None]:
import statsmodels.api as sm

parameters_for_fit = ['visual_stimulus_ratio',
                      'previous_port_before_stimulus_numeric',
                      'interaction_term',
                      'previous_choice_left_correct',
                      'previous_choice_right_wrong',
                      'previous_choice_left', 
                      'interaction_term_brightness'
                    ]
variable_for_prediction = 'left_choice'

# drop NaN values if any
df_for_fit = df_test.dropna(subset=parameters_for_fit + [variable_for_prediction])
df_for_fit = df_for_fit[parameters_for_fit].astype(int)

# Prepare the independent variables
X_multi = df_for_fit[parameters_for_fit].values
X_multi_const = sm.add_constant(X_multi)
y = df_for_fit[variable_for_prediction].values.astype(int)

# Fit the logistic regression model with multiple regressors
logit_model_multi = sm.Logit(y, X_multi_const).fit()

# Display the summary, which includes p-values for all regressors
print(logit_model_multi.summary(xname= ["intercept"] + parameters_for_fit))

In [None]:
#remove the [] of parameters_for_fit
parameters_ = [f"'{param}'" for param in parameters_for_fit]
parameters_

calculate the "evidences" and the choices

In [None]:
df_test['visual_stimulus_ratio'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0] / eval(x)[1]))
# transform it to a log value, preserving the negative sign
df_test['visual_stimulus_ratio'] = df_test['visual_stimulus_ratio'].apply(lambda x: np.log(x))
# reduce the decimal places to 4, so it is easier to read
df_test['visual_stimulus_ratio'] = df_test['visual_stimulus_ratio'].apply(lambda x: round(x, 4))
# This was good in order to make the fit work for both left and right choices!
df_test['visual_stimulus_ratio'] = df_test.apply(
    lambda row: row['visual_stimulus_ratio'] if row['correct_side'] == 'left' else -row['visual_stimulus_ratio'],
    axis=1
)
df_test['visual_stimulus_diff'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0] - eval(x)[1]))
df_test['visual_stimulus_diff'] = df_test.apply(
    lambda row: row['visual_stimulus_diff'] if row['correct_side'] == 'left' else -row['visual_stimulus_diff'],
    axis=1
)

# !!!!! This introduces a bug!! What would happen on the trials where the mouse has to go right? Which value would be used then?
# df_test['left_choice'] = np.where((df_test['correct_side'] == 'left') & (df_test['correct'] == True), 1, 0)

# What you want is a value that goes from 0 to 1, indicating the probability of a left choice.
# For this fits, we really don't care about the correct side, we just want to know if the mouse chose left or right.

# I realized that the way I was plotting this before was using the performance of the mouse and the trials difficulty,
# in order to infer back the probability of a left choice. But we can actually use something simpler and less confusing:

# I had already created a function in the df_transforms module, to get the first choice of a mouse so we can use it here
df_test = dft.add_mouse_first_choice(df_test)
# This creates the column "first_choice" that indicates "left" or "right" for each trial.

# Now we can transform this to 0 and 1, where 0 is right and 1 is left
df_test['left_choice'] = df_test['first_choice'].apply(lambda x: 1 if x == 'left' else 0)

# By the way I am naming columns weirdly, just so you can play around with the different solutions and see how they work.
# Once we have what we need, we should clean up the code and use more meaningful names.

In [None]:
def get_evidence_ratio(df):
    df['visual_stimulus_ratio'] = df['visual_stimulus'].apply(lambda x: abs(eval(x)[0] / eval(x)[1]))
    # transform it to a log value, preserving the negative sign
    df['visual_stimulus_ratio'] = df['visual_stimulus_ratio'].apply(lambda x: np.log(x))
    # reduce the decimal places to 4, so it is easier to read
    df['visual_stimulus_ratio'] = df['visual_stimulus_ratio'].apply(lambda x: round(x, 4))
    # This was good in order to make the fit work for both left and right choices!
    df['visual_stimulus_ratio'] = df.apply(
        lambda row: row['visual_stimulus_ratio'] if row['correct_side'] == 'left' else -row['visual_stimulus_ratio'],
        axis=1
    )
    return df

def get_left_choice(df):
    df = dft.add_mouse_first_choice(df)
    df['left_choice'] = df['first_choice'].apply(lambda x: 1 if x == 'left' else 0)
    return df

df_test = get_evidence_ratio(df_test)
df_test = get_left_choice(df_test)

In [None]:
# Now we can fit the data and visualize the results
X = df_test['visual_stimulus_ratio'].values.reshape(-1, 1)
y = df_test['left_choice'].values.astype(int)
model = LogisticRegression()
model.fit(X, y)

# Now we have a model that predicts the probability of a left choice based on ANY visual stimulus ratio (xs).
# For plotting, we can generate a range of values for the visual stimulus ratio
import numpy as np
xs = np.linspace(df_test['visual_stimulus_ratio'].min(), df_test['visual_stimulus_ratio'].max(), 100).reshape(-1, 1)
y_prob = model.predict_proba(xs)[:, 1]

# Plot the actual choices of the mouse
fig, ax = plt.subplots(figsize=(5, 5))
sns.pointplot(
    x='visual_stimulus_ratio',
    y='left_choice',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    ax=ax,
    label='Observed Choices',
    native_scale= True,
    linestyles='',
)

# overlay the fitted logistic regression curve
ax.plot(xs, y_prob, color='red', label='Logistic Regression Fit')
ax.set_xlabel("Visual Stimulus ratio")
ax.set_ylabel("Probability of Left Choice")
ax.set_ylim(0, 1)
plt.title("Psychometric Curve")
plt.legend()
plt.show()



In [None]:
# testing a model with lapses
from scipy.optimize import minimize
import numpy as np

# Define the lapse logistic function with independent lapses for left and right
def lapse_logistic_independent(params, x, y):
    lapse_left, lapse_right, beta, x0 = params
    # Ensure lapse rates are within [0, 0.5]
    lapse_left = np.clip(lapse_left, 0, 0.5)
    lapse_right = np.clip(lapse_right, 0, 0.5)
    # Predicted probabilities
    p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (x - x0)))
    # Negative log-likelihood
    nll = -np.sum(y * np.log(p_left) + (1 - y) * np.log(1 - p_left))
    return nll

# Initial parameter guesses: [lapse_left, lapse_right, beta, x0]
initial_params = [0.05, 0.05, 1, 0]

# Fit the model
x = df_test['visual_stimulus_ratio'].values
y = df_test['left_choice'].values
result = minimize(
    lapse_logistic_independent,
    initial_params,
    args=(x, y),
    bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
)

# Extract fitted parameters
lapse_left, lapse_right, beta, x0 = result.x
print(f"Lapse Left: {lapse_left}, Lapse Right: {lapse_right}, Slope (Beta): {beta}, PSE (x0): {x0}")

# Generate predictions
xs = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (xs - x0)))

# Plot the fitted curve
fig, ax = plt.subplots(figsize=(5, 5))
sns.pointplot(
    x='visual_stimulus_ratio',
    y='left_choice',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    ax=ax,
    label='Observed Choices',
    native_scale=True,
    linestyles='',
)
ax.plot(xs, p_left, color='red', label='Lapse Logistic Fit (Independent)')
ax.set_xlabel("Visual Stimulus ratio")
ax.set_ylabel("Probability of Left Choice")
ax.set_ylim(0, 1)
plt.title("Psychometric Curve with Independent Lapses")
plt.legend()
plt.show()

the following cell can be use to evaluate the model. It will be useful when comparing different models

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from scipy.optimize import minimize
import numpy as np

# Define the lapse logistic function with independent lapses for left and right
def lapse_logistic_independent(params, x, y):
    lapse_left, lapse_right, beta, x0 = params
    # Ensure lapse rates are within [0, 0.5]
    lapse_left = np.clip(lapse_left, 0, 0.5)
    lapse_right = np.clip(lapse_right, 0, 0.5)
    # Predicted probabilities
    p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (x - x0)))
    # Negative log-likelihood
    nll = -np.sum(y * np.log(p_left) + (1 - y) * np.log(1 - p_left))
    return nll

# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)  # 5-fold cross-validation
log_losses = []

# Perform cross-validation
for train_index, test_index in kf.split(df_test):
    # Split the data
    x_train, x_test = df_test['visual_stimulus_ratio'].values[train_index], df_test['visual_stimulus_ratio'].values[test_index]
    y_train, y_test = df_test['left_choice'].values[train_index], df_test['left_choice'].values[test_index]
    
    # Initial parameter guesses: [lapse_left, lapse_right, beta, x0]
    initial_params = [0.05, 0.05, 1, 0]
    
    # Fit the model on the training data
    result = minimize(
        lapse_logistic_independent,
        initial_params,
        args=(x_train, y_train),
        bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
    )
    
    # Extract fitted parameters
    lapse_left, lapse_right, beta, x0 = result.x
    
    # Generate predictions on the test data
    p_left_test = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (x_test - x0)))
    
    # Calculate log loss for the test data
    loss = log_loss(y_test, p_left_test)
    log_losses.append(loss)

# Print cross-validation results
print(f"Cross-Validation Log Losses: {log_losses}")
print(f"Mean Log Loss: {np.mean(log_losses)}")
print(f"Standard ratio of Log Loss: {np.std(log_losses)}")

In [None]:
# independent lapses model applied to the difference

# Initial parameter guesses: [lapse_left, lapse_right, beta, x0]
initial_params = [0.05, 0.05, 1, 0]

# Fit the model
x = df_test['visual_stimulus_diff'].values
y = df_test['left_choice'].values
result = minimize(
    lapse_logistic_independent,
    initial_params,
    args=(x, y),
    bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
)

# Extract fitted parameters
lapse_left, lapse_right, beta, x0 = result.x
print(f"Lapse Left: {lapse_left}, Lapse Right: {lapse_right}, Slope (Beta): {beta}, PSE (x0): {x0}")

# Generate predictions
xs = np.linspace(df_test['visual_stimulus_diff'].min(), df_test['visual_stimulus_diff'].max(), 100)
p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (xs - x0)))

# Plot the fitted curve
fig, ax = plt.subplots(figsize=(5, 5))
# bin the visual stimulus difference for better visualization
df_test["visual_stimulus_diff_binned"] = df_test['visual_stimulus_diff'] // 0.1 / 10
sns.pointplot(
    x='visual_stimulus_diff_binned',
    y='left_choice',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    ax=ax,
    label='Observed Choices',
    native_scale=True,
    linestyles='',
)
ax.plot(xs, p_left, color='red', label='Lapse Logistic Fit')
ax.set_xlabel("Visual Stimulus Difference")
ax.set_ylabel("Probability of Left Choice")
plt.title("Psychometric Curve with Independent Lapses")
plt.legend()
plt.show()

weight and stats for the different predictors:
- visual stimulus ratio (you call it deviation)
- visual stimulus diff. Nuo: change to "total intensity on left port"
- port where the animal is coming from
- interactions
- Nuo: add another regressor: the previous correct choice

We can play around with this things

In [None]:
# get what the animal is doing, if it is alternating or repeating to the left or to the right
df_test = dft.add_mouse_first_choice(df_test)
df_test = dft.add_mouse_last_choice(df_test)
df_test = dft.add_port_where_animal_comes_from(df_test)
# turn the column "previous_port_before_stimulus" into a numeric value, where 0 is right and 1 is left
df_test['previous_port_before_stimulus_numeric'] = df_test['previous_port_before_stimulus'].apply(
    lambda x: 1 if x == 'left' else 0 if x == 'right' else np.nan
)
# turn the column "roa_choice" into a numeric value, where 0 is alternate and 1 is repeat
df_test['roa_choice_numeric'] = df_test['roa_choice'].apply(
    lambda x: 1 if x == 'repeat' else 0 if x == 'alternate' else np.nan
)

In [None]:
# Add an interaction term between visual_stimulus_ratio and visual_stimulus_diff
def interaction_calc(row):
    is_left = 1 if row['correct_side'] == 'left' else -1
    return row['visual_stimulus_ratio'] * row['visual_stimulus_diff'] * is_left

df_test['interaction_term'] = df_test.apply(interaction_calc, axis=1)

In [None]:
# get the intensity of left stimulus for each trial
import ast
df_test['left_ilumi'] = df_test.apply(
    lambda row: ast.literal_eval(row['visual_stimulus'])[0] if row['correct_side'] == 'left' else ast.literal_eval(row['visual_stimulus'])[1],
    axis=1
)

In [None]:
def interaction_calc_brightness(row):
    is_left = 1 if row['correct_side'] == 'left' else -1
    return row['left_ilumi'] * row['visual_stimulus_ratio'] * is_left
df_test['interaction_term_brightness'] = df_test.apply(interaction_calc_brightness, axis=1)

In [None]:
# Add the previous choice is left and correct as 1, or 0, remain NaN
if "last_choice" not in df_test.columns:
    df_test = dft.add_mouse_last_choice(df_test)
df_test['previous_choice_left_correct'] = np.nan
df_test['previous_choice_left_correct'] = df_test['previous_choice_left_correct'].astype(object)
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)
        df_mouse_session['correct_in_previous'] = df_mouse_session['correct'].shift(1, fill_value=np.nan)

        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['correct_in_previous'].isna(),
            None,
            ((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['correct_in_previous'] == True)).astype(int)
        )
        df_test.loc[df_mouse_session.index, 'previous_choice_left_correct'] = series_to_append


In [None]:
# Add the previous choice is right and wrong as 1, or 0, remain NaN
if "last_choice" not in df_test.columns:
    df_test = dft.add_mouse_last_choice(df_test)
df_test['previous_choice_right_wrong'] = np.nan
df_test['previous_choice_right_wrong'] = df_test['previous_choice_right_wrong'].astype(object)
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)
        df_mouse_session['wrong_in_previous'] = df_mouse_session['correct'].shift(1, fill_value=np.nan)

        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['wrong_in_previous'].isna(),
            None,
            ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['wrong_in_previous'] == False)).astype(int)
        )
        df_test.loc[df_mouse_session.index, 'previous_choice_right_wrong'] = series_to_append


In [None]:
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)

        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna(),
            None,
            (df_mouse_session['last_choice_in_previous'] == 'left').astype(int)
        )
        df_test.loc[df_mouse_session.index, 'previous_choice_left'] = series_to_append


In [None]:
import statsmodels.api as sm

parameters_for_fit = ['visual_stimulus_ratio',
                      'previous_port_before_stimulus_numeric',
                      'interaction_term',
                      'previous_choice_left_correct',
                      'previous_choice_right_wrong',
                      'previous_choice_left', 
                      'interaction_term_brightness'
                    ]
variable_for_prediction = 'left_choice'

# drop NaN values if any
df_for_fit = df_test.dropna(subset=parameters_for_fit + [variable_for_prediction])
df_for_fit = df_for_fit[parameters_for_fit].astype(int)

# Prepare the independent variables
X_multi = df_for_fit[parameters_for_fit].values
X_multi_const = sm.add_constant(X_multi)
y = df_for_fit[variable_for_prediction].values.astype(int)

# Fit the logistic regression model with multiple regressors
logit_model_multi = sm.Logit(y, X_multi_const).fit()

# Display the summary, which includes p-values for all regressors
print(logit_model_multi.summary(xname= ["intercept"] + parameters_for_fit))

In [None]:
from lecilab_behavior_analysis import plots


correct choice as output

In [None]:
if "last_choice" not in df_test.columns:
    df_test = dft.add_mouse_last_choice(df_test)
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)
        df_mouse_session['correct_in_previous'] = df_mouse_session['correct'].shift(1, fill_value=np.nan)
        
        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['correct_in_previous'].isna(),
            None,
            (((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == True)) 
             | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == True))
            ).astype(int)
        )
        df_test.loc[df_mouse_session.index, 'same_choice_correctPre'] = series_to_append

        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['correct_in_previous'].isna(),
            None,
            (((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == False)) 
             | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == False))
            ).astype(int)
        )
        df_test.loc[df_mouse_session.index, 'diff_choice_wrongPre'] = series_to_append

In [None]:
df_test['wrong_bright'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[1]))
df_test['correct_bright'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0]))
df_test['bright_relative_correct_1'] = df_test['correct_bright'] / (df_test['wrong_bright'] + df_test['correct_bright'])
df_test['bright_relative_correct_2'] = df_test['correct_bright'] / (df_test['wrong_bright'] - df_test['correct_bright'])

df_test.dropna(subset=['same_choice_correctPre'], inplace=True)
df_test['same_choice_correctPre'] = df_test['same_choice_correctPre'].astype(int)

df_test.dropna(subset=['diff_choice_wrongPre'], inplace=True)
df_test['diff_choice_wrongPre'] = df_test['diff_choice_wrongPre'].astype(int)

In [None]:
df_test['correct_numeric'] = df_test['correct'].astype(int)

In [None]:
df_test['abs_visual_stimulus_ratio'] = df_test['visual_stimulus_ratio'].abs()

In [None]:
if "last_choice" not in df_test.columns:
    df_test = dft.add_mouse_last_choice(df_test)
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)
        df_mouse_session['correct_in_previous'] = df_mouse_session['correct'].shift(1, fill_value=np.nan)

        df_test.loc[df_mouse_session.index, 'previous_choice'] = df_mouse_session['last_choice_in_previous']
        df_test.loc[df_mouse_session.index, 'previous_correct'] = df_mouse_session['correct_in_previous']

# Add the same choice as previous, where 1 is the same choice and 0 is a different choice
        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['left_choice'].isna(),
            None,
            (((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 1)) 
             | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 0))
            ).astype(int)
        )
        df_test.loc[df_mouse_session.index, 'same_choice_previous'] = series_to_append


In [None]:
df_test.dropna(subset=['same_choice_previous'], inplace=True)
df_test['same_choice_previous'] = df_test['same_choice_previous'].astype(int)
df_test.dropna(subset=['previous_correct'], inplace=True)
df_test['previous_correct'] = df_test['previous_correct'].astype(int)

In [None]:
if "last_choice" not in df_test.columns:
    df_test = dft.add_mouse_last_choice(df_test)
for mouse in df_test['subject'].unique():
    for session in df_test[df_test.subject == mouse]['session'].unique():
        df_mouse_session = df_test[np.logical_and(df_test['subject'] == mouse, df_test['session'] == session)]
        df_mouse_session['last_choice_in_previous'] = df_mouse_session['last_choice'].shift(1, fill_value=np.nan)
        df_mouse_session['correct_in_previous'] = df_mouse_session['correct'].shift(1, fill_value=np.nan)
        
        # Define conditions for the 4 groups
        conditions = [
            ((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == True)) | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == True)),
            ((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == False)) | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == False)),
            ((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == True)) | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == True)),
            ((df_mouse_session['last_choice_in_previous'] == 'left') & (df_mouse_session['left_choice'] == 0) & (df_mouse_session['correct_in_previous'] == False)) | ((df_mouse_session['last_choice_in_previous'] == 'right') & (df_mouse_session['left_choice'] == 1) & (df_mouse_session['correct_in_previous'] == False)),
        ]

        # Corresponding values
        values = [2, -2, 1, -1] # same choice correct, same choice wrong, different choice correct, different choice wrong

        # Assign group values, set to None if any isna
        series_to_append = np.select(
            condlist=conditions,
            choicelist=values,
            default=None
        )
        series_to_append = np.where(
            df_mouse_session['last_choice_in_previous'].isna() | df_mouse_session['correct_in_previous'].isna() | df_mouse_session['left_choice'].isna(),
            None,
            series_to_append
        )
        df_test.loc[df_mouse_session.index, 'previous_choice_same_correct'] = series_to_append


In [None]:
df_test.dropna(subset=['previous_choice_same_correct'], inplace=True)
df_test['previous_choice_same_correct'] = df_test['previous_choice_same_correct'].astype(int)

In [None]:
df_test['wrong_bright_zscore'] = df_test.groupby('abs_visual_stimulus_ratio')['wrong_bright'].transform(lambda x: (x - x.mean()) / x.std())

In [None]:
parameters_for_fit = ['abs_visual_stimulus_ratio',
                      'wrong_bright', 
                      # 'wrong_bright_zscore',
                      # 'bright_relative_correct_1',
                      # 'bright_relative_correct_2',
                      # 'same_choice_correctPre', 
                      # 'diff_choice_wrongPre', 
                      'same_choice_previous', 
                      'previous_correct', 
                      # 'previous_choice_same_correct',
                    ]
variable_for_prediction = 'correct_numeric'

# drop NaN values if any
df_for_fit = df_test.dropna(subset=parameters_for_fit + [variable_for_prediction])
# Prepare the independent variables
X_multi = df_for_fit[parameters_for_fit].values
X_multi_const = sm.add_constant(X_multi)
y = df_for_fit[variable_for_prediction].values.astype(int) 
# Fit the logistic regression model with multiple regressors
logit_model_multi = sm.Logit(y, X_multi_const).fit()
# Display the summary, which includes p-values for all regressors
print(logit_model_multi.summary(xname= ["intercept"] + parameters_for_fit))

Correct wrong psychometric curve

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
for i, linecolor in zip(df_test[df_test['previous_choice'] == 'left'].groupby('previous_correct'), ['red', 'green']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[0],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )

for i, linecolor in zip(df_test[df_test['previous_choice'] == 'right'].groupby('previous_correct'), ['red', 'green']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[1],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )
ax[0].legend()
ax[0].set_title("Left Choice Previous")
ax[1].legend()
ax[1].set_title("Right Choice Previous")

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
for i, linecolor in zip(df_test[df_test['previous_correct'] == True].groupby('previous_choice'), ['gold', 'lightskyblue']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[0],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )

for i, linecolor in zip(df_test[df_test['previous_correct'] == False].groupby('previous_choice'), ['gold', 'lightskyblue']):
    plots.psychometric_plot_by_discreVal(df = i[1], 
                                         x = 'visual_stimulus_ratio', 
                                         y = 'left_choice', 
                                         ax=ax[1],
                                         markercolor='k',
                                         markers='o',
                                         errorbar=("ci", 95),
                                         markerlabel=None,
                                         markersize=5, 
                                         linecolor=linecolor, 
                                         linelabel='previous ' + str(i[0])
                                        )
ax[0].legend()
ax[0].set_title("Correct Choice Previous")
ax[1].legend()
ax[1].set_title("Incorrect Choice Previous")

Matrix format

In [None]:
# let's use the absolute value of the lowest visual stimulus as a proxy for the brightness of the visual stimulus
df_test['visual_stimulus_lowest'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0]) if eval(x)[0] < eval(x)[1] else abs(eval(x)[1]))
# create 10 bins for the absolute value of the lowest visual stimulus
min_value = df_test['visual_stimulus_lowest'].min()
max_value = df_test['visual_stimulus_lowest'].max()
bins = np.linspace(min_value, max_value, 11)
df_test['visual_stimulus_lowest_binned'] = pd.cut(df_test['visual_stimulus_lowest'], bins=bins, labels=[f"{b:.2f}" for b in bins[:-1]])
# create a pivot table with the visual stimulus ratio and absolute value of the lowest visual stimulus
pivot_table_abs = df_test.pivot_table(
    index='visual_stimulus_lowest_binned',
    columns='visual_stimulus_ratio',
    values='left_choice',
    aggfunc='mean',
    observed=True
)
# plot the heatmap
plt.figure(figsize=(5, 5))
sns.heatmap(pivot_table_abs, cmap='coolwarm', annot=True, fmt=".2f", cbar_kws={'label': 'Probability of Left Choice'})
plt.xlabel("Visual Stimulus ratio")
plt.ylabel("Absolute Value of Lowest Visual Stimulus")
plt.title("Heatmap of Probability of Left Choice")
# rotate the y-axis labels
plt.yticks(rotation=0)
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
# transform visual_stimulus_lowest_binned to a numeric value for plotting
df_test['visual_stimulus_lowest_binned_num'] = pd.to_numeric(df_test['visual_stimulus_lowest_binned'], errors='coerce')

# make two plots, one for when the animals comes from the left and one for when it comes from the right
fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
# Plot for when the animal comes from the left
for ax, side in zip(axs.ravel(), ['left', 'right']):
    df_side = df_test[df_test['previous_port_before_stimulus'] == side]
    for i in df_side.groupby('visual_stimulus_ratio'):
        df_i = i[1].sort_values(by='visual_stimulus_lowest_binned_num')
        # drop nan
        df_i = df_i.dropna(subset=['visual_stimulus_lowest_binned_num'])
        X = df_i['visual_stimulus_lowest_binned_num'].values.reshape(-1, 1)
        y = df_i['left_choice'].values.astype(int)
        model = LogisticRegression()
        model.fit(X, y)
        y_pred = model.predict(X)
        y_prob = model.predict_proba(X)[:, 1]
        ax.plot(X, y_prob, label=f"Visual Stimulus ratio: {i[0]}")
    ax.set_xlabel("Absolute Value of Lowest Visual Stimulus")
    ax.set_ylabel("Probability of Left Choice")
    ax.legend()
    ax.set_title(f"Last Choice Before Stimulus: {side.capitalize()}")
plt.show()

Fit the lapse model independently considering previous choices

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

colors = ["blue", "orange"]

for color, side in zip(colors, ['left', 'right']):
    df_side = df_test[df_test['previous_port_before_stimulus'] == side]
    # Fit the model
    x = df_side['visual_stimulus_ratio'].values
    y = df_side['left_choice'].values
    result = minimize(
        lapse_logistic_independent,
        initial_params,
        args=(x, y),
        bounds=[(0, 0.5), (0, 0.5), (None, None), (None, None)]
    )

    # Extract fitted parameters
    lapse_left, lapse_right, beta, x0 = result.x
    print(f"Side: {side}, Lapse Left: {lapse_left}, Lapse Right: {lapse_right}, Slope (Beta): {beta}, PSE (x0): {x0}")

    # Generate predictions
    xs = np.linspace(df_side['visual_stimulus_ratio'].min(), df_side['visual_stimulus_ratio'].max(), 100)
    p_left = lapse_left + (1 - lapse_left - lapse_right) / (1 + np.exp(-beta * (xs - x0)))

    # Plot the fitted curve

    sns.pointplot(
        x='visual_stimulus_ratio',
        y='left_choice',
        data=df_side,
        estimator=lambda x: np.mean(x),
        color=color,
        markers='o',
        errorbar=("ci", 95),
        ax=ax,
        label=f'Choices when coming from {side}',
        native_scale=True,
        linestyles='',
    )
    ax.plot(xs, p_left, color=color, label='Lapse Logistic Fit')
    ax.set_xlabel("Visual Stimulus ratio")
    ax.set_ylabel("Probability of Left Choice")
    plt.title(f"Psychometric Curves")
    ax.legend()
plt.show()

I kept what you did for comparison here

In [None]:
# It is interesting to compare the effects of the relative difference between the two visual stimuli,
# and the absolute difference between them.

# Maybe what we can do is to train another logistic regression model, adding as well the absolute difference
# between the two visual stimuli, and see how it affects the probability of a left choice.
# Do you know what I mean?

for i in df_test.groupby('visual_stimulus_ratio'):
    df_i = i[1].sort_values(by='visual_stimulus_diff')
    X = df_i['visual_stimulus_diff'].values.reshape(-1, 1)
    y = df_i['left_choice'].values.astype(int)
    model = LogisticRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]
    plt.plot(X, y_prob, label=f"Visual Stimulus ratio: {i[0]}")
    plt.legend()
plt.xlabel("Visual Stimulus Difference")
plt.ylabel("Probability of Left Choice")
plt.show()

Multiple animals analysis


In [None]:
df_dic = {}
for mouse in animals:
    local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
    # create the directory if it doesn't exist
    local_path.mkdir(parents=True, exist_ok=True)
    # download the session data
    utils.rsync_session_data(
        project_name=tv_projects[1],
        animal=mouse,
        local_path=str(local_path),
        credentials=utils.get_idibaps_cluster_credentials(),
    )
    # load the data
    df_dic[mouse] = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
df_dic_hard = {}
for df_name, df in zip(df_dic.keys(), df_dic.values()):
    if 'TwoAFC_visual_hard' in df["current_training_stage"].unique():
        df = df.dropna(subset = ['visual_stimulus'])
        df = df[df["current_training_stage"] == "TwoAFC_visual_hard"]

        df['visual_stimulus_ratio'] = df['visual_stimulus'].apply(lambda x: abs(round(eval(x)[0] / eval(x)[1], 4)))
        df['visual_stimulus_ratio'] = df.apply(
            lambda row: row['visual_stimulus_ratio'] if row['correct_side'] == 'left' else -row['visual_stimulus_ratio'],
            axis=1
        )
        df['visual_stimulus_diff'] = df['visual_stimulus'].apply(lambda x: abs(eval(x)[0] - eval(x)[1]))
        df['visual_stimulus_diff'] = df.apply(
            lambda row: row['visual_stimulus_diff'] if row['correct_side'] == 'left' else -row['visual_stimulus_diff'],
            axis=1
        )
        df["visual_stimulus_diff_binned"] = df['visual_stimulus_diff'] // 0.1
        df = dft.add_mouse_first_choice(df)
        df['left_choice'] = df['first_choice'].apply(lambda x: 1 if x == 'left' else 0)
        
        df_dic_hard[df_name] = df

In [None]:
df_ratio_diffBin_inter_p = pd.DataFrame()
df_ratio_diffBin_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_ratio', 'visual_stimulus_diff_binned', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_ratio_diffBin_inter_p[df_name] = logit_model_multi.pvalues
    df_ratio_diffBin_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_ratio_diff_inter_p = pd.DataFrame()
df_ratio_diff_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_ratio', 'visual_stimulus_diff', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_ratio_diff_inter_p[df_name] = logit_model_multi.pvalues
    df_ratio_diff_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_ratio_diffBin_inter_p.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_ratio_diff_inter_p.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_ratio_diff_inter_p[df_name], label=df_name+ 'ratio_diff_inter', color=color)
    plt.plot (df_ratio_diffBin_inter_p[df_name], label=df_name+ 'ratio_diffBin_inter', color=color, linestyle='--')
plt.axhline(y=0.05, color='k', linestyle='--', label='p-value threshold')
plt.xlabel("Regressors")
plt.ylabel("p-value")
plt.legend(loc = (1 , 0))

In [None]:
df_ratio_diffBin_inter_coef.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_ratio_diff_inter_coef.rename(index={'visual_stimulus_ratio': 'ratio', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_ratio_diff_inter_coef[df_name], label=df_name+ 'ratio_diff_inter', color=color)
    plt.plot (df_ratio_diffBin_inter_coef[df_name], label=df_name+ 'ratio_diffBin_inter', color=color, linestyle='--')
plt.xlabel("Regressors")
plt.ylabel("Coefficient")
plt.legend(loc = (1 , 0))