In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import time
import datetime as dt
import os
import seaborn as sns
import scipy.stats as sts

import random
import string

import polars as pl

In [None]:
from nb_vars import FOLDER_INTERMEDIATE, FOLDER_OUTPUT, CUTOFF_SPIKES_HZ

## Load df of filtered peaks

In [None]:
CONDITION_NAME = "EXPERIMENTO"
CONDITION = "1"  

folder_input =  f'{FOLDER_INTERMEDIATE}/{CONDITION_NAME} {CONDITION}'

df_peaks_exp_1 = pl.read_csv(f'{folder_input}/df_peaks_full_{CONDITION_NAME}_{CONDITION}_freq_{CUTOFF_SPIKES_HZ}.csv').to_pandas()


In [None]:
CONDITION_NAME = "CONTROL"
CONDITION = "1" 

folder_input =  f'{FOLDER_INTERMEDIATE}/{CONDITION_NAME} {CONDITION}'

df_peaks_ctrl_1 = pl.read_csv(f'{folder_input}/df_peaks_full_{CONDITION_NAME}_{CONDITION}_freq_{CUTOFF_SPIKES_HZ}.csv').to_pandas()

In [None]:
CONDITION_NAME = "EXPERIMENTO"
CONDITION = "2" 

folder_input =  f'{FOLDER_INTERMEDIATE}/{CONDITION_NAME} {CONDITION}'

df_peaks_exp_2 = pl.read_csv(f'{folder_input}/df_peaks_full_{CONDITION_NAME}_{CONDITION}_freq_{CUTOFF_SPIKES_HZ}.csv').to_pandas()


In [None]:
CONDITION_NAME = "CONTROL"
CONDITION = "2" 

folder_input =  f'{FOLDER_INTERMEDIATE}/{CONDITION_NAME} {CONDITION}'

df_peaks_ctrl_2 = pl.read_csv(f'{folder_input}/df_peaks_full_{CONDITION_NAME}_{CONDITION}_freq_{CUTOFF_SPIKES_HZ}.csv').to_pandas()

In [None]:
def adapt_MFR_df(df):
    df['day'] = [1 if '24h' in i else 0 for i in df['time_exp']]
    df.loc[df['time_exp'] == 'Base', 'day'] = -1 
    df['min'] = [int(i.replace('24h + ', '').replace(' min', '')) if i != 'Base' else 0 for i in df['time_exp']]

    df = df.sort_values(by=['day', 'min'])

    df['condition_time'] = df['condition'] + ' | ' + df['time_exp']
    df['condition_time'] = df['condition_time'].apply(lambda x: x.replace(' min', '').replace('h + ', '_'))

    df['time_exp'] = df['time_exp'].apply(lambda x: x.replace(' min', '´'))

    return df

## Relative change in MFR (without Baseline normalisation)

### EXPERIMENT 1

In [None]:
df_MFR_ctrl_1 = df_peaks_ctrl_1.groupby(['condition', 'time_exp', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_ctrl_1['time'].max() - df_peaks_ctrl_1['time'].min())
df_MFR_ctrl_1 = df_MFR_ctrl_1.reset_index().sort_values(by=['condition', 'time_exp','well', 'electrode'])
df_MFR_ctrl_1 = df_MFR_ctrl_1.rename(columns={'time': 'MFR'})

df_MFR_ctrl_1 = adapt_MFR_df(df_MFR_ctrl_1)
df_MFR_ctrl_1

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_1, x='time_exp', y='MFR', hue = 'replicate', ax=ax)


In [None]:
df_MFR_exp_1 = df_peaks_exp_1.groupby(['condition', 'time_exp', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_exp_1['time'].max() - df_peaks_exp_1['time'].min())
df_MFR_exp_1 = df_MFR_exp_1.reset_index().sort_values(by=['condition', 'time_exp','well', 'electrode'])
df_MFR_exp_1 = df_MFR_exp_1.rename(columns={'time': 'MFR'})

df_MFR_exp_1 = adapt_MFR_df(df_MFR_exp_1)
df_MFR_exp_1

In [None]:
df_MFR_ctrl_exp_1 = pd.concat([df_MFR_ctrl_1, df_MFR_exp_1])

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_exp_1, x='time_exp', y='MFR', hue = 'condition', ax=ax)
plt.show()


In [None]:
for tto in ['H PLKO', 'H 219', 'LIPOS -', 'LIPOS 219']:
    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_exp_1, x='time_exp', y='MFR', ax=ax)
    ax.set_title(tto + ' MERGED REPLICATES')
    plt.show()

    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_exp_1, x='time_exp', y='MFR', ax=ax, hue='replicate')
    ax.set_title(tto + ' INDIVIDUAL REPLICATES')
    plt.show()

### EXPERIMENT 2

In [None]:
df_MFR_ctrl_2 = df_peaks_ctrl_2.groupby(['condition', 'time_exp', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_ctrl_2['time'].max() - df_peaks_ctrl_2['time'].min())
df_MFR_ctrl_2 = df_MFR_ctrl_2.reset_index().sort_values(by=['condition', 'time_exp','well', 'electrode'])
df_MFR_ctrl_2 = df_MFR_ctrl_2.rename(columns={'time': 'MFR'})

df_MFR_ctrl_2 = adapt_MFR_df(df_MFR_ctrl_2)
df_MFR_ctrl_2

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_2, x='time_exp', y='MFR', hue = 'replicate', ax=ax)

In [None]:
df_MFR_exp_2 = df_peaks_exp_2.groupby(['condition', 'time_exp', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_exp_2['time'].max() - df_peaks_exp_2['time'].min())
df_MFR_exp_2 = df_MFR_exp_2.reset_index().sort_values(by=['condition', 'time_exp','well', 'electrode'])
df_MFR_exp_2 = df_MFR_exp_2.rename(columns={'time': 'MFR'})

df_MFR_exp_2 = adapt_MFR_df(df_MFR_exp_2)
df_MFR_exp_2

In [None]:
df_MFR_ctrl_exp_2 = pd.concat([df_MFR_ctrl_2, df_MFR_exp_2])

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_exp_2, x='time_exp', y='MFR', hue = 'condition', ax=ax)
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_2, x='time_exp', y='MFR', hue = 'replicate', ax=ax)
plt.show()

In [None]:
df_MFR_2 = df_peaks_exp_2.groupby(['condition', 'time_exp', 'well', 'replicate', 'electrode']).count()['time'] / (df_peaks_exp_2['time'].max() - df_peaks_exp_2['time'].min())
df_MFR_2 = df_MFR_2.reset_index().sort_values(by=['well', 'electrode'])
df_MFR_2 = df_MFR_2.rename(columns={'time': 'MFR'})

df_MFR_2 = adapt_MFR_df(df_MFR_2)
df_MFR_2

In [None]:
for tto in ['GLIO PLKO', 'GLIO 219', 'LIPOS -', 'ASTRO 219']:
    df_MFR_2_sub = df_MFR_2[df_MFR_2['condition'].isin(['BASAL', tto])]
    df_MFR_2_sub.loc[df_MFR_2_sub['condition'] == 'BASAL', 'time_exp'] = 'B'

    print(tto)
    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_2_sub, x='time_exp', y='MFR', ax=ax)
    ax.set_title(tto + ' MERGED REPLICATES')
    plt.show()

    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_2_sub, x='time_exp', y='MFR', ax=ax, hue='replicate')
    ax.set_title(tto + ' INDIVIDUAL REPLICATES')
    plt.show()

## Relative change in MFR (with Baseline normalisation)

In [None]:
def normalize_MFR(df, condition):
    df_sub = df[df['condition'] == condition]
    base_df = df_sub[df_sub['time_exp'] == 'Base']
    rest_df = df_sub[df_sub['time_exp'] != 'Base']

    norm_df = rest_df.copy()
    norm_df = norm_df.rename(columns={'MFR': 'MFR_norm'})

    list_wells_electrodes = base_df[['well', 'electrode']].values

    for well, electrode in zip(list_wells_electrodes.T[0],list_wells_electrodes.T[1]):
        MFR_top = rest_df.loc[(rest_df['well'] == well) & (rest_df['electrode'] == electrode), 'MFR'].values
        MFR_base = base_df.loc[(base_df['well'] == well) & (base_df['electrode'] == electrode), 'MFR'].values

        MFR_norm = (MFR_top / MFR_base)**0.5  # This helps reduce some noise in the data

        norm_df.loc[(norm_df['well'] == well) & (norm_df['electrode'] == electrode), 'MFR_norm'] = MFR_norm

    return norm_df

### EXPERIMENT 1

In [None]:
df_MFR_ctrl_1_norm = normalize_MFR(df_MFR_ctrl_1, condition='CONTROL')

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_1_norm, x='time_exp', y='MFR_norm', hue = 'replicate', ax=ax)
ax.set_ylim([0, 2.5])

In [None]:
df_MFR_exp_1_norm_list = [normalize_MFR(df_MFR_exp_1, condition=condition) for condition in df_MFR_exp_1['condition'].drop_duplicates().values]
df_MFR_exp_1_norm = pd.concat(df_MFR_exp_1_norm_list)

df_MFR_ctrl_exp_1_norm = pd.concat([df_MFR_ctrl_1_norm, df_MFR_exp_1_norm])


fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_exp_1_norm, x='time_exp', y='MFR_norm', hue = 'condition', ax=ax)
ax.set_ylim([0, 3])
plt.show()


In [None]:
for tto in df_MFR_ctrl_exp_1_norm['condition'].drop_duplicates().values:
    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_ctrl_exp_1_norm, x='time_exp', y='MFR_norm', ax=ax)
    ax.set_title(tto + ' MERGED REPLICATES')
    ax.set_ylim([0, 3])
    plt.show()

    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_ctrl_exp_1_norm, x='time_exp', y='MFR_norm', ax=ax, hue='replicate')
    ax.set_title(tto + ' INDIVIDUAL REPLICATES')
    ax.set_ylim([0, 3])
    plt.show()

### EXPERIMENT 2

In [None]:
df_MFR_ctrl_2_norm = normalize_MFR(df_MFR_ctrl_2, condition='CONTROL')

fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_2_norm, x='time_exp', y='MFR_norm', hue = 'replicate', ax=ax)
ax.set_ylim([0, 2])

In [None]:
df_MFR_exp_2_norm_list = [normalize_MFR(df_MFR_exp_2, condition=condition) for condition in df_MFR_exp_2['condition'].drop_duplicates().values]
df_MFR_exp_2_norm = pd.concat(df_MFR_exp_2_norm_list)

df_MFR_ctrl_exp_2_norm = pd.concat([df_MFR_ctrl_2_norm, df_MFR_exp_2_norm])


fig, ax = plt.subplots(1,1, figsize=(28,6))
sns.boxplot(data=df_MFR_ctrl_exp_2_norm, x='time_exp', y='MFR_norm', hue = 'condition', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
for tto in df_MFR_ctrl_exp_2_norm['condition'].drop_duplicates().values:
    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_ctrl_exp_2_norm, x='time_exp', y='MFR_norm', ax=ax)
    ax.set_title(tto + ' MERGED REPLICATES')
    plt.show()

    fig, ax = plt.subplots(1,1, figsize=(28,6))
    sns.boxplot(data=df_MFR_ctrl_exp_2_norm, x='time_exp', y='MFR_norm', ax=ax, hue='replicate')
    ax.set_title(tto + ' INDIVIDUAL REPLICATES')
    plt.show()

## Plotting Exp1 and Exp2 together

In [None]:
df_MFR_ctrl_exp_12_norm = pd.concat([df_MFR_ctrl_exp_1_norm, df_MFR_ctrl_exp_2_norm])
df_MFR_ctrl_exp_12_norm['experiment'] = ['1'] * len(df_MFR_ctrl_exp_1_norm) + ['2'] * len(df_MFR_ctrl_exp_2_norm)

df_MFR_ctrl_exp_12_norm['condition_experiment'] = df_MFR_ctrl_exp_12_norm['condition'] + ' ' + df_MFR_ctrl_exp_12_norm['experiment']

In [None]:
df_MFR_ctrl_exp_12_norm = df_MFR_ctrl_exp_12_norm[df_MFR_ctrl_exp_12_norm['time_exp'] != '180´']

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.boxplot(data=df_MFR_ctrl_exp_12_norm, x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_norm[df_MFR_ctrl_exp_12_norm['condition'].isin(['GLIO 219', 'H 219', 'LIPOS 219', 'ASTRO 219'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_norm[df_MFR_ctrl_exp_12_norm['condition'].isin(['GLIO 219', 'GLIO PLKO'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_norm[df_MFR_ctrl_exp_12_norm['condition'].isin(['H 219', 'H PLKO'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_norm[df_MFR_ctrl_exp_12_norm['condition'].isin(['LIPOS 219', 'LIPOS -'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

**Conclusions derived from the exp 1+2 analysis with baseline normalization**

- Exp 1 and Exp 2 are not directly comparable, although both suffer (in exp 1 is more pronounced) a decline in signal at the beginning, which tends to neutralize towards the end, and keeps at the same rate at 24h (approx).
- Respective to their experiments, lipos are the condition that most decrease the MFR.
- In Exp 1, lipos 219 shows higher decrease than lipos, but H 219 is increased compared to H PLKO.
- In Exp 2, GLIO PLKO and 219 show a similar effect.

## Regressing plaque 1 and 2 info

We are going to apply a correction factor (general across all times) so that plaque 1 control's mean equals plaque 2 control's mean equal 1.

In [None]:
mean_CTRL_1_MFR = df_MFR_ctrl_exp_12_norm[(df_MFR_ctrl_exp_12_norm['condition_experiment'] == 'CONTROL 1') & (df_MFR_ctrl_exp_12_norm['time_exp'] == '0´')]['MFR_norm'].median()
mean_CTRL_2_MFR = df_MFR_ctrl_exp_12_norm[(df_MFR_ctrl_exp_12_norm['condition_experiment'] == 'CONTROL 2') & (df_MFR_ctrl_exp_12_norm['time_exp'] == '0´')]['MFR_norm'].median()

In [None]:
df_MFR_ctrl_exp_12_regress = df_MFR_ctrl_exp_12_norm.copy()

df_MFR_ctrl_exp_12_regress.loc[df_MFR_ctrl_exp_12_regress['experiment'] == '1', 'MFR_norm'] /= mean_CTRL_1_MFR
df_MFR_ctrl_exp_12_regress.loc[df_MFR_ctrl_exp_12_regress['experiment'] == '2', 'MFR_norm'] /= mean_CTRL_2_MFR

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.boxplot(data=df_MFR_ctrl_exp_12_regress, x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_regress[df_MFR_ctrl_exp_12_regress['condition'].isin(['GLIO 219', 'H 219', 'LIPOS 219', 'ASTRO 219'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_regress[df_MFR_ctrl_exp_12_regress['condition'].isin(['GLIO 219', 'GLIO PLKO'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_regress[df_MFR_ctrl_exp_12_regress['condition'].isin(['H 219', 'H PLKO'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.lineplot(data=df_MFR_ctrl_exp_12_regress[df_MFR_ctrl_exp_12_regress['condition'].isin(['LIPOS 219', 'LIPOS -'])], x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])
plt.show()

# Using LMMs + sigmoid fitting to measure differences between conditions

In this section we are going to compare HEK 219/PLKO and LIPOS 219/- to quantify the differences between both cases. In general, what we see is that although the linear model shows a statstically significant difference between 219 and -/PLKO, we see that the linear mixed model does not fit the data correctly. Therefore, we are going to fit a sigmoid curve and calculate the statistical differences between the fitted parameters for each replicate, and see if there is any explainable difference.

The fitted sigmoid is represented as this:

$$ \frac{L}{1 + e^{-k * (t- t0)}} + S $$

Where $L$ is the amplitude (max value) of the curve, $S$ is the minimum value, $k$ is the slope of the curve at the inflexion point and $t_0$ is the x-location of the inflexion point.

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm
import scipy.stats as sts
from statsmodels.multivariate.manova import MANOVA
from statsmodels.stats.multitest import multipletests
from scipy.optimize import curve_fit


In [None]:
def sigmoid(t, L, k, t0, S):
    return L / (1 + np.exp(-k * (t - t0))) + S

In [None]:
# These bounds are very important because otherwise the fitted values do not represent the data correctly (the fitting algorithm finds another minimum at a non-coherent position of parameters).
# L, k, t0, S
sigmoid_bounds = ((0.2, 0.001, 40, 0), (1.9, 1.2, 150, 0.5))

In [None]:
def plot_sigmoids_condition(data_sub_condition, array_condition):
    fig = plt.figure(figsize=(20, 4))
    data_sub_condition.loc[:, 'min_scatter_replicate'] = (data_sub_condition['min'] + data_sub_condition['replicate'] * 1 - 2.5)
    sns.scatterplot(data=data_sub_condition, x='min_scatter_replicate', y='MFR_norm', hue='replicate')

    # Create the df with the calculated sigmoig adjustments
    x, y, replicates = [], [], []
    for idx, rep in enumerate(data_sub_condition['replicate'].unique()):
        L, k, t0, S = array_condition[idx, :]

        for t in np.arange(max(data_sub['min'])):
            x.append(t)
            replicates.append(rep)
            y.append(sigmoid(t, L, k, t0, S))


    df_sigmoid = pd.DataFrame({'x': x, 'y': y, 'replicate': replicates})
    sns.lineplot(data=df_sigmoid, x='x', y='y', hue='replicate', legend=False)


## H 219 vs H PLKO (removing outliers)

In [None]:
data_sub = df_MFR_ctrl_exp_12_regress[(df_MFR_ctrl_exp_12_regress['day'] == 0) & (df_MFR_ctrl_exp_12_regress['condition'].isin(['H 219', 'H PLKO']))]
data_sub = data_sub[data_sub['MFR_norm'] < 3]

In [None]:
# Example data structure

# Fit the mixed-effects model
model = mixedlm("MFR_norm ~ condition * min", data_sub, groups=data_sub["replicate"])
result = model.fit()
print(result.summary())

In [None]:
data_sub['predicted'] = result.predict()

In [None]:
# Set the aesthetic style of the plots
sns.set_theme(style="whitegrid")

# Create a FacetGrid for plotting
g = sns.FacetGrid(data_sub, col="condition", height=5, aspect=1.5)

# Plot the actual data
g.map_dataframe(sns.scatterplot, x="min", y="MFR_norm", hue="replicate", s=50, legend=False)

# Plot the model predictions
g.map_dataframe(sns.lineplot, x="min", y="predicted", hue="replicate", style="replicate", legend=False, ci=None)

# Adjust the title and labels
g.set_axis_labels("Time (min)", "MFR_norm")
g.set_titles("Condition: {col_name}")

# Show the plot

plt.show()

### Fitting a sigmoid

In [None]:
# Apply to each replicate
data_sub_219 = data_sub[(data_sub['condition'] == 'H 219') & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
params_219, _ = curve_fit(sigmoid, data_sub_219['min'], data_sub_219['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)


data_sub_plko = data_sub[(data_sub['condition'] == 'H PLKO')  & (data_sub['min'] > 0)]
params_plko, _ = curve_fit(sigmoid, data_sub_plko['min'], data_sub_plko['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)




fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.boxplot(data=data_sub, x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])

plt.plot(np.arange(170)/10, sigmoid(np.arange(170), params_219[0], params_219[1], params_219[2], params_219[3]))
plt.plot(np.arange(170)/10, sigmoid(np.arange(170), params_plko[0], params_plko[1], params_plko[2], params_plko[3]))

plt.show()

In [None]:
# Replicate and test if the differences between parameters are statistically significant

array_params_219 = np.zeros((len(data_sub[data_sub['condition'] == 'H 219']['replicate'].unique()), 4))
array_params_plko = np.zeros((len(data_sub[data_sub['condition'] == 'H PLKO']['replicate'].unique()), 4))

for replicate in data_sub[data_sub['condition'] == 'H 219']['replicate'].unique():
    data_sub_219_rep = data_sub[(data_sub['condition'] == 'H 219') & (data_sub['replicate'] == replicate) & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
    params_219, _ = curve_fit(sigmoid, data_sub_219_rep['min'], data_sub_219_rep['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)
    array_params_219[replicate - 1, :] = params_219

for replicate in data_sub[data_sub['condition'] == 'H PLKO']['replicate'].unique():
    data_sub_plko_rep = data_sub[(data_sub['condition'] == 'H PLKO') & (data_sub['replicate'] == replicate) & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
    params_plko, _ = curve_fit(sigmoid, data_sub_plko_rep['min'], data_sub_plko_rep['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)
    array_params_plko[replicate - 1, :] = params_plko


# MANOVA

# Assuming you have parameter arrays for categories A and B:
# params_A = np.array([[L1_A, k1_A, t01_A], [L2_A, k2_A, t02_A], ...])
# params_B = np.array([[L1_B, k1_B, t01_B], [L2_B, k2_B, t02_B], ...])

# Combine and label the data
params = np.vstack((array_params_219, array_params_plko))
labels = [0] * len(array_params_219) + [1] * len(array_params_plko)

# Perform MANOVA
maov = MANOVA(endog=params, exog=labels)
print(maov.mv_test())

In [None]:
param_names = ["L", "k", "t0", "S"]
p_values = []

# Conduct t-tests for each parameter
for idx, parameter in enumerate(param_names):
    t_stat, p_val = sts.ttest_ind(array_params_219[:, idx], array_params_plko[:, idx])
    print(f"T-test for {parameter}: t-stat={t_stat}, p-value={p_val}")
    p_values.append(p_val)

# Adjust p-values for multiple comparisons using the Bonferroni correction
corrected_p_values = multipletests(p_values, method='fdr_twostage')[1]
print('\n')
# Display the corrected p-values
for param, corrected_p_val in zip(param_names, corrected_p_values):
    print(f"Corrected p-value for {param}: {corrected_p_val}")

In [None]:
data = pd.DataFrame(np.vstack((array_params_219, array_params_plko)), columns=param_names)
data['Group'] = ['219'] * len(array_params_219) + ['-'] * len(array_params_plko)
data['replicate'] = data_sub[data_sub['condition'] == 'H 219']['replicate'].unique().tolist() + data_sub[data_sub['condition'] == 'H PLKO']['replicate'].unique().tolist() 

# Set up the plotting environment
sns.set_theme(style="whitegrid")

# Create a plot for each parameter
fig, axes = plt.subplots(1, len(param_names), figsize=(20, 5), sharey=False)

for i, param in enumerate(param_names):
    sns.swarmplot(x='Group', y=param, data=data, hue='replicate', ax=axes[i], size=7)
    axes[i].set_title(f'Distribution of {param}')
    axes[i].set_xlabel('Group')
    axes[i].set_ylabel(param)

plt.tight_layout()
plt.show()


In [None]:
data_sub_condition = data_sub[data_sub['condition'] == 'H 219']
array_condition = array_params_219
plot_sigmoids_condition(data_sub_condition, array_condition)


data_sub_condition = data_sub[data_sub['condition'] == 'H PLKO']
array_condition = array_params_plko
plot_sigmoids_condition(data_sub_condition, array_condition)

We see that there is a combined difference in the curves by using a MANOVA analysis (P < 0.0238) but when looking at the individual adjusted p-values, none of them report a clear difference. The most "different" value is $S$, which is smallest in PLKO, indicating that the starting/basal MFR is lower. As for the rest of parameters, similar to $S$, $t_0$ is very promising, but not statistically significant, because there is one replicate with very high $t_0$ value (and similarly but opposite in the PLKO condition). If we wouldn't consider this data, it would imply that 219 has a inflexion point much before PLKO.

## LIPOS 219 vs LIPOS PLKO (removing outliers)

In [None]:
data_sub = df_MFR_ctrl_exp_12_regress[(df_MFR_ctrl_exp_12_regress['day'] == 0) & (df_MFR_ctrl_exp_12_regress['experiment'] == '1') & (df_MFR_ctrl_exp_12_regress['condition'].isin(['LIPOS 219', 'LIPOS -']))]
data_sub = data_sub[data_sub['MFR_norm'] < 3]

In [None]:
# Example data structure

# Fit the mixed-effects model
model = mixedlm("MFR_norm ~ condition * min", data_sub, groups=data_sub["replicate"])
result = model.fit()
print(result.summary())

In [None]:
data_sub['predicted'] = result.predict()

In [None]:
# Set the aesthetic style of the plots
sns.set_theme(style="whitegrid")

# Create a FacetGrid for plotting
g = sns.FacetGrid(data_sub, col="condition", height=5, aspect=1.5)

# Plot the actual data
g.map_dataframe(sns.scatterplot, x="min", y="MFR_norm", hue="replicate", s=50, legend=False)

# Plot the model predictions
g.map_dataframe(sns.lineplot, x="min", y="predicted", hue="replicate", style="replicate", legend=False, errorbar=None)

g.set_axis_labels("Time (min)", "MFR_norm")
g.set_titles("Condition: {col_name}")

plt.show()

### Fitting a sigmoid

In [None]:
# Apply to each replicate
data_sub_0 = data_sub[(data_sub['condition'] == 'LIPOS -')  & (data_sub['min'] > 0)]
params_0, _ = curve_fit(sigmoid, data_sub_0['min'], data_sub_0['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)

data_sub_219 = data_sub[(data_sub['condition'] == 'LIPOS 219') & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
params_219, _ = curve_fit(sigmoid, data_sub_219['min'], data_sub_219['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)



fig, ax = plt.subplots(1,1, figsize=(35,6))
sns.boxplot(data=data_sub, x='time_exp', y='MFR_norm', hue = 'condition_experiment', ax=ax)
ax.set_ylim([0, 3])

plt.plot(np.arange(170)/10, sigmoid(np.arange(170), params_0[0], params_0[1], params_0[2], params_0[3]))
plt.plot(np.arange(170)/10, sigmoid(np.arange(170), params_219[0], params_219[1], params_219[2], params_219[3]))

plt.show()

In [None]:
# Replicate and test if the differences between parameters are statistically significant

array_params_219 = np.zeros((len(data_sub[data_sub['condition'] == 'LIPOS 219']['replicate'].unique()), 4))
array_params_0 = np.zeros((len(data_sub[data_sub['condition'] == 'LIPOS -']['replicate'].unique()), 4))

for replicate in data_sub[data_sub['condition'] == 'LIPOS 219']['replicate'].unique():
    data_sub_219_rep = data_sub[(data_sub['condition'] == 'LIPOS 219') & (data_sub['replicate'] == replicate) & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
    params_219, _ = curve_fit(sigmoid, data_sub_219_rep['min'], data_sub_219_rep['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)
    array_params_219[replicate - 1, :] = params_219

for replicate in data_sub[data_sub['condition'] == 'LIPOS -']['replicate'].unique():
    data_sub_0_rep = data_sub[(data_sub['condition'] == 'LIPOS -') & (data_sub['replicate'] == replicate) & (data_sub['min'] > 0)] # Setting >=0 or >0 does nto alter the S values a lot
    params_0, _ = curve_fit(sigmoid, data_sub_0_rep['min'], data_sub_0_rep['MFR_norm'], maxfev=25000, bounds=sigmoid_bounds)
    array_params_0[replicate - 1, :] = params_0


# MANOVA
# Combine and label the data
params = np.vstack((array_params_219, array_params_0))
labels = [0] * len(array_params_219) + [1] * len(array_params_0)

# Perform MANOVA
maov = MANOVA(endog=params, exog=labels)
print(maov.mv_test())

In [None]:
param_names = ["L", "k", "t0", "S"]
p_values = []

# Conduct t-tests for each parameter
for idx, parameter in enumerate(param_names):
    t_stat, p_val = sts.ttest_ind(array_params_219[:, idx], array_params_0[:, idx])
    print(f"T-test for {parameter}: t-stat={t_stat}, p-value={p_val}")
    p_values.append(p_val)

# Adjust p-values for multiple comparisons using the Bonferroni correction
corrected_p_values = multipletests(p_values, method='fdr_twostage')[1]
print('\n')
# Display the corrected p-values
for param, corrected_p_val in zip(param_names, corrected_p_values):
    print(f"Corrected p-value for {param}: {corrected_p_val}")

In [None]:
data = pd.DataFrame(np.vstack((array_params_219, array_params_0)), columns=param_names)
data['Group'] = ['219'] * len(array_params_219) + ['-'] * len(array_params_0)
data['replicate'] = data_sub[data_sub['condition'] == 'LIPOS 219']['replicate'].unique().tolist() + data_sub[data_sub['condition'] == 'LIPOS -']['replicate'].unique().tolist() 

# Set up the plotting environment
sns.set_theme(style="whitegrid")

# Create a plot for each parameter
fig, axes = plt.subplots(1, len(param_names), figsize=(20, 5), sharey=False)

for i, param in enumerate(param_names):
    sns.swarmplot(x='Group', y=param, data=data, hue='replicate', ax=axes[i], size=7)
    axes[i].set_title(f'Distribution of {param}')
    axes[i].set_xlabel('Group')
    axes[i].set_ylabel(param)

plt.tight_layout()
plt.show()

In [None]:
data_sub_condition = data_sub[data_sub['condition'] == 'LIPOS 219']
array_condition = array_params_219
plot_sigmoids_condition(data_sub_condition, array_condition)


data_sub_condition = data_sub[data_sub['condition'] == 'LIPOS -']
array_condition = array_params_0
plot_sigmoids_condition(data_sub_condition, array_condition)

We see that there is a combined difference in the curves by using a MANOVA analysis (P < 0.0177) but when looking at the individual adjusted p-values, none of them report a clear difference. The most "different" value is $t_0$, with a p-value of ~0.01, and adjusted to ~0.05, and shows that t0 is lower in the control than in the 219 sample. This comes in contrast with the $t_0$ differences in the HEK sample.