In [None]:
import pandas as pd
import lecilab_behavior_analysis.utils as utils
from pathlib import Path
%load_ext autoreload
%autoreload 2


In [None]:
# mouse = "mouse2"
# df = utils.load_example_data(mouse)

In [None]:
# load data from cluster
tv_projects = utils.get_server_projects()
print(tv_projects)


In [None]:

# see the available animals
animals = utils.get_animals_in_project(tv_projects[1])
print(animals)

In [None]:
# download the data for a specific animal
mouse = "ACV007"
local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
# create the directory if it doesn't exist
local_path.mkdir(parents=True, exist_ok=True)
# download the session data
utils.rsync_session_data(
    project_name=tv_projects[1],
    animal=mouse,
    local_path=str(local_path),
    credentials=utils.get_idibaps_cluster_credentials(),
)

In [None]:
# load the data
df = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
# import lecilab_behavior_analysis.plots as plots
# import lecilab_behavior_analysis.df_transforms as dft
# df = dft.fill_missing_data(df)

# # add a column with the date for the day
# df = dft.add_day_column_to_df(df)

# # create a figure with 1 axis for the calendar plot
# import matplotlib.pyplot as plt
# fig, ax_cal = plt.subplots(figsize=(10, 5), dpi=300)
# # generate the calendar plot
# dates_df = dft.get_dates_df(df)
# cal_image = plots.rasterize_plot(plots.training_calendar_plot(dates_df), dpi=300)
# # paste the calendar plot filling the entire axis
# ax_cal.imshow(cal_image)
# ax_cal.axis("off")

# plt.show()

In [None]:
# from lecilab_behavior_analysis.figure_maker import subject_progress_figure
# fig = subject_progress_figure(df, perf_window=100, summary_matrix_plot=False)

In [None]:
# from lecilab_behavior_analysis.figure_maker import session_summary_figure
# from lecilab_behavior_analysis.df_transforms import add_trial_of_day_column_to_df, add_day_column_to_df
# # select the session you want to plot
# date = "2025-05-06"
# df = add_day_column_to_df(df)
# df = add_trial_of_day_column_to_df(df)
# sdf = df[df["year_month_day"] == date]
# fig = session_summary_figure(sdf, mouse, perf_window=15, width=10, height=5)

In [None]:
# df.date.astype('datetime64[ns]').dt.strftime("%Y-%m-%d").head(10)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import curve_fit
import lecilab_behavior_analysis.df_transforms as dft
from sklearn.linear_model import LogisticRegression
import seaborn as sns

In [None]:
df_test = df.dropna(subset = ['visual_stimulus'])

In [None]:
# reduce the dataset to the psychometric version of the task
# Otherwise, we would include a lot of "easy" trials that would bias the fit
df_test = df_test[df_test["current_training_stage"] == "TwoAFC_visual_hard"]

In [None]:
df_test['visual_stimulus_devi'] = df_test['visual_stimulus'].apply(lambda x: abs(round(eval(x)[0] / eval(x)[1], 4)))
# This was good in order to make the fit work for both left and right choices!
df_test['visual_stimulus_devi'] = df_test.apply(
    lambda row: row['visual_stimulus_devi'] if row['correct_side'] == 'left' else -row['visual_stimulus_devi'],
    axis=1
)
df_test['visual_stimulus_diff'] = df_test['visual_stimulus'].apply(lambda x: abs(eval(x)[0] - eval(x)[1]))
df_test['visual_stimulus_diff'] = df_test.apply(
    lambda row: row['visual_stimulus_diff'] if row['correct_side'] == 'left' else -row['visual_stimulus_diff'],
    axis=1
)
df_test["visual_stimulus_diff_binned"] = df_test['visual_stimulus_diff'] // 0.1
# !!!!! This introduces a bug!! What would happen on the trials where the mouse has to go right? Which value would be used then?
# df_test['left_choice'] = np.where((df_test['correct_side'] == 'left') & (df_test['correct'] == True), 1, 0)

# What you want is a value that goes from 0 to 1, indicating the probability of a left choice.
# For this fits, we really don't care about the correct side, we just want to know if the mouse chose left or right.

# I realized that the way I was plotting this before was using the performance of the mouse and the trials difficulty,
# in order to infer back the probability of a left choice. But we can actually use something simpler and less confusing:

# I had already created a function in the df_transforms module, to get the first choice of a mouse so we can use it here
df_test = dft.add_mouse_first_choice(df_test)
# This creates the column "first_choice" that indicates "left" or "right" for each trial.

# Now we can transform this to 0 and 1, where 0 is right and 1 is left
df_test['left_choice_new'] = df_test['first_choice'].apply(lambda x: 1 if x == 'left' else 0)

# By the way I am naming columns weirdly, just so you can play around with the different solutions and see how they work.
# Once we have what we need, we should clean up the code and use more meaningful names.

In [None]:
# df_test_sorted = df_test.sort_values(by='visual_stimulus_diff')
# X = df_test_sorted['visual_stimulus_diff'].values.reshape(-1, 1)
# y = df_test_sorted['left_choice'].values.astype(int)
# model = LogisticRegression()
# model.fit(X, y)
# y_pred = model.predict(X)
# y_prob = model.predict_proba(X)[:, 1]
# plt.plot(X, y_prob)
# plt.xlabel("Visual Stimulus Difference")
# plt.ylabel("Probability of Left Choice")

In [None]:
# Now we can fit the data and visualize the results
X = df_test['visual_stimulus_devi'].values.reshape(-1, 1)
y = df_test['left_choice_new'].values.astype(int)
model = LogisticRegression()
model.fit(X, y)

# Now we have a model that predicts the probability of a left choice based on ANY visual stimulus deviation (xs).
# For plotting, we can generate a range of values for the visual stimulus deviation
import numpy as np
xs = np.linspace(df_test['visual_stimulus_devi'].min(), df_test['visual_stimulus_devi'].max(), 100).reshape(-1, 1)
y_prob = model.predict_proba(xs)[:, 1]

# Plot the actual choices of the mouse
fig, ax = plt.subplots(figsize=(5, 5))
sns.pointplot(
    x='visual_stimulus_devi',
    y='left_choice_new',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    ax=ax,
    label='Observed Choices',
    native_scale= True,
    linestyles='',
)

# overlay the fitted logistic regression curve
ax.plot(xs, y_prob, color='red', label='Logistic Regression Fit')
ax.set_xlabel("Visual Stimulus Deviation")
ax.set_ylabel("Probability of Left Choice")
plt.title("Psychometric Curve")
plt.legend()
plt.show()



In [None]:
# Now we can fit the data and visualize the results
X = df_test['visual_stimulus_diff_binned'].values.reshape(-1, 1)
y = df_test['left_choice_new'].values.astype(int)
model = LogisticRegression()
model.fit(X, y)

##Why we still use binned difference to fit the data and then interpolate to predict, instead of using the all of the values of differences?

# Now we have a model that predicts the probability of a left choice based on ANY visual stimulus deviation (xs).
# For plotting, we can generate a range of values for the visual stimulus deviation
import numpy as np
xs = np.linspace(df_test['visual_stimulus_diff_binned'].min(), df_test['visual_stimulus_diff_binned'].max(), 100).reshape(-1, 1)
y_prob = model.predict_proba(xs)[:, 1]

# Plot the actual choices of the mouse
fig, ax = plt.subplots(figsize=(5, 5))
sns.pointplot(
    x='visual_stimulus_diff_binned',
    y='left_choice_new',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    ax=ax,
    label='Observed Choices',
    native_scale= True,
    linestyles='',
)

# overlay the fitted logistic regression curve
ax.plot(xs, y_prob, color='red', label='Logistic Regression Fit')
ax.set_xlabel("Visual Stimulus Difference")
ax.set_ylabel("Probability of Left Choice")
plt.title("Psychometric Curve")
plt.legend()
plt.show()



In [None]:
# split by difficulty
sns.pointplot(
    x='visual_stimulus_diff_binned',
    y='left_choice_new',
    data=df_test,
    estimator=lambda x: np.mean(x),
    markers='o',
    errorbar=("ci", 95),
    # native_scale= True,
    linestyles='',
    hue="difficulty"
)
plt.plot()

In [None]:
import statsmodels.api as sm

# Add an interaction term between visual_stimulus_devi and visual_stimulus_diff
def interaction_calc(row):
    is_left = 1 if row['correct_side'] == 'left' else -1
    return row['visual_stimulus_devi'] * row['visual_stimulus_diff_binned'] * is_left
df_test['interaction_term'] = df_test.apply(interaction_calc, axis=1)


# Prepare the independent variables
X_multi = df_test[['visual_stimulus_devi', 'visual_stimulus_diff_binned', 'interaction_term']]
X_multi_const = sm.add_constant(X_multi)
y = df_test['left_choice_new'].values.astype(int)

# Fit the logistic regression model with multiple regressors
logit_model_multi = sm.Logit(y, X_multi_const).fit()

# Display the summary, which includes p-values for all regressors
print(logit_model_multi.summary())

In [None]:
# It is interesting to compare the effects of the relative difference between the two visual stimuli,
# and the absolute difference between them.

# Maybe what we can do is to train another logistic regression model, adding as well the absolute difference
# between the two visual stimuli, and see how it affects the probability of a left choice.
# Do you know what I mean?

for i in df_test.groupby('visual_stimulus_devi'):
    df_i = i[1].sort_values(by='visual_stimulus_diff')
    X = df_i['visual_stimulus_diff'].values.reshape(-1, 1)
    y = df_i['left_choice'].values.astype(int)
    model = LogisticRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]
    plt.plot(X, y_prob, label=f"Visual Stimulus Deviation: {i[0]}")
    plt.legend()
plt.xlabel("Visual Stimulus Difference")
plt.ylabel("Probability of Left Choice")

In [None]:
# Add an interaction term between visual_stimulus_devi and visual_stimulus_diff
def interaction_calc(row):
    is_left = 1 if row['correct_side'] == 'left' else -1
    return row['visual_stimulus_devi'] * row['visual_stimulus_diff'] * is_left
df_test['interaction_term'] = df_test.apply(interaction_calc, axis=1)

# Prepare the independent variables with the interaction term
X_multi_interaction = df_test[['visual_stimulus_devi', 'visual_stimulus_diff', 'interaction_term']]
X_multi_interaction_const = sm.add_constant(X_multi_interaction)

# Fit the logistic regression model with the interaction term
logit_model_interaction = sm.Logit(y, X_multi_interaction_const).fit()

# Display the summary, which includes p-values for all regressors
print(logit_model_interaction.summary())

In [None]:
from mpl_toolkits.mplot3d import Axes3D

# Generate a grid of values for visualization
devi_range = np.linspace(df_test['visual_stimulus_devi'].min(), df_test['visual_stimulus_devi'].max(), 50)
diff_range = np.linspace(df_test['visual_stimulus_diff'].min(), df_test['visual_stimulus_diff'].max(), 50)
devi_grid, diff_grid = np.meshgrid(devi_range, diff_range)
interaction_grid = devi_grid * diff_grid

# Flatten the grid for prediction
grid_data = np.column_stack((devi_grid.ravel(), diff_grid.ravel(), interaction_grid.ravel()))
grid_data_const = sm.add_constant(grid_data)

# Predict probabilities using the model
probabilities = logit_model_interaction.predict(grid_data_const).reshape(devi_grid.shape)

# Plot the 3D surface
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(devi_grid, diff_grid, probabilities, cmap='viridis', alpha=0.8)
ax.set_xlabel('Visual Stimulus Deviation')
ax.set_ylabel('Visual Stimulus Difference')
ax.set_zlabel('Probability of Left Choice')
plt.title('Influence of Regressors on Probability of Left Choice')
fig.colorbar(surf, shrink=0.5, aspect=10)
plt.show()

In [None]:
# Create 2D plots to visualize the influence of each regressor
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)

# Plot for visual_stimulus_devi
axes[0].plot(devi_range, logit_model_interaction.predict(sm.add_constant(np.column_stack((devi_range, np.zeros_like(devi_range), np.zeros_like(devi_range))))), color='blue')
axes[0].set_title('Influence of Visual Stimulus Deviation')
axes[0].set_xlabel('Visual Stimulus Deviation')
axes[0].set_ylabel('Probability of Left Choice')

# Plot for visual_stimulus_diff
axes[1].plot(diff_range, logit_model_interaction.predict(sm.add_constant(np.column_stack((np.zeros_like(diff_range), diff_range, np.zeros_like(diff_range))))), color='green')
axes[1].set_title('Influence of Visual Stimulus Difference')
axes[1].set_xlabel('Visual Stimulus Difference')

# Plot for interaction term
# interaction_range = devi_range * diff_range.mean()
# axes[2].plot(devi_range, logit_model_interaction.predict(sm.add_constant(np.column_stack((devi_range, np.full_like(devi_range, diff_range.mean()), interaction_range)))), color='red')
interaction_range = devi_range * diff_range
axes[2].plot(devi_range, logit_model_interaction.predict(sm.add_constant(np.column_stack((np.zeros_like(diff_range), np.zeros_like(devi_range), interaction_range)))), color='red')
axes[2].set_title('Influence of Interaction Term')
axes[2].set_xlabel('Visual Stimulus Deviation')

plt.tight_layout()
plt.show()

In [None]:
sns.pointplot(
    x='visual_stimulus_diff_binned',
    y='left_choice_new',
    data=df_test,
    estimator=lambda x: np.mean(x),
    color='blue',
    markers='o',
    errorbar=("ci", 95),
    native_scale=True,
    linestyles='',
)

In [None]:
df_dic = {}
for mouse in animals:
    local_path = Path(utils.get_outpath()) / Path(tv_projects[1]) / Path("sessions") / Path(mouse)
    # create the directory if it doesn't exist
    local_path.mkdir(parents=True, exist_ok=True)
    # download the session data
    utils.rsync_session_data(
        project_name=tv_projects[1],
        animal=mouse,
        local_path=str(local_path),
        credentials=utils.get_idibaps_cluster_credentials(),
    )
    # load the data
    df_dic[mouse] = pd.read_csv(local_path / Path(f'{mouse}.csv'), sep=";")

In [None]:
df_dic_hard = {}
for df_name, df in zip(df_dic.keys(), df_dic.values()):
    if 'TwoAFC_visual_hard' in df["current_training_stage"].unique():
        df = df.dropna(subset = ['visual_stimulus'])
        df = df[df["current_training_stage"] == "TwoAFC_visual_hard"]

        df['visual_stimulus_devi'] = df['visual_stimulus'].apply(lambda x: abs(round(eval(x)[0] / eval(x)[1], 4)))
        df['visual_stimulus_devi'] = df.apply(
            lambda row: row['visual_stimulus_devi'] if row['correct_side'] == 'left' else -row['visual_stimulus_devi'],
            axis=1
        )
        df['visual_stimulus_diff'] = df['visual_stimulus'].apply(lambda x: abs(eval(x)[0] - eval(x)[1]))
        df['visual_stimulus_diff'] = df.apply(
            lambda row: row['visual_stimulus_diff'] if row['correct_side'] == 'left' else -row['visual_stimulus_diff'],
            axis=1
        )
        df["visual_stimulus_diff_binned"] = df['visual_stimulus_diff'] // 0.1
        df = dft.add_mouse_first_choice(df)
        df['left_choice_new'] = df['first_choice'].apply(lambda x: 1 if x == 'left' else 0)
        
        df_dic_hard[df_name] = df

In [None]:
df_devi_diffBin_inter_p = pd.DataFrame()
df_devi_diffBin_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_devi', 'visual_stimulus_diff_binned', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice_new'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_devi_diffBin_inter_p[df_name] = logit_model_multi.pvalues
    df_devi_diffBin_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_devi_diff_inter_p = pd.DataFrame()
df_devi_diff_inter_coef = pd.DataFrame()
for df_name, df in zip(df_dic_hard.keys(), df_dic_hard.values()):
    df['interaction_term'] = df.apply(interaction_calc, axis=1)
    # Prepare the independent variables
    X_multi = df[['visual_stimulus_devi', 'visual_stimulus_diff', 'interaction_term']]
    X_multi_const = sm.add_constant(X_multi)
    y = df['left_choice_new'].values.astype(int)

    # Fit the logistic regression model with multiple regressors
    logit_model_multi = sm.Logit(y, X_multi_const).fit()

    df_devi_diff_inter_p[df_name] = logit_model_multi.pvalues
    df_devi_diff_inter_coef[df_name] = logit_model_multi.params

In [None]:
df_devi_diffBin_inter_p.rename(index={'visual_stimulus_devi': 'devi', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_devi_diff_inter_p.rename(index={'visual_stimulus_devi': 'devi', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_devi_diff_inter_p[df_name], label=df_name+ 'devi_diff_inter', color=color)
    plt.plot (df_devi_diffBin_inter_p[df_name], label=df_name+ 'devi_diffBin_inter', color=color, linestyle='--')
plt.axhline(y=0.05, color='k', linestyle='--', label='p-value threshold')
plt.xlabel("Regressors")
plt.ylabel("p-value")
plt.legend(loc = (1 , 0))

In [None]:
df_devi_diffBin_inter_coef.rename(index={'visual_stimulus_devi': 'devi', 'visual_stimulus_diff_binned': 'diff', 'interaction_term': 'inter'}, inplace=True)
df_devi_diff_inter_coef.rename(index={'visual_stimulus_devi': 'devi', 'visual_stimulus_diff': 'diff', 'interaction_term': 'inter'}, inplace=True)
for df_name, color in zip(df_dic_hard.keys(), sns.color_palette("colorblind", len(df_dic_hard))):
    plt.plot (df_devi_diff_inter_coef[df_name], label=df_name+ 'devi_diff_inter', color=color)
    plt.plot (df_devi_diffBin_inter_coef[df_name], label=df_name+ 'devi_diffBin_inter', color=color, linestyle='--')
plt.xlabel("Regressors")
plt.ylabel("Coefficient")
plt.legend(loc = (1 , 0))