In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import glob
from importlib import reload
import extract_features
import os
import numpy as np
import json
import seaborn as sns
import results_functions
from scipy import stats
import pickle
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests


json_path_project = 'S:\\AG\\AG-Bewegungsstoerungen-II\\LFP\\PROJECTS\BATTERY\\'
json_path_onedrive = 'C:\\Users\\mathiopv\\OneDrive - Charité - Universitätsmedizin Berlin\\BATTERY_LIFE\\'

### Make descriptive boxplots for all

In [None]:
directory = os.path.join(json_path_onedrive,
    'results', 'Avg_Features', 'Avg_Features_Tbls' )

dir_saving = os.path.join(json_path_onedrive,
    'results', 'Avg_Features', 'test_results' )
reload(results_functions)
%matplotlib qt
saving = 1
df_fu0m, df_fu3m, df_fu12m = results_functions.get_descriptives(directory, dir_saving, saving)

'''
with open(os.path.join(directory,
    'Means_FU0M.pkl'), "rb") as file:
    val_dat = pickle.load(file)
'''

#### Pairwise Comparisons

In [None]:
all_fus_df = pd.read_csv(os.path.join(
    json_path_onedrive, 'results', 'Avg_Features', 'test_results','All_FollowUp_dfs.csv'
)) 

all_fus_df.head()

In [None]:
# Create a dictionary to store the results for each column
all_columns = ['Telemetry_AllMin', 'TelemDurSumSMinRes', 'TelemDurSumMinWard', 'SensDurSumMin']
wilcoxon_results = {}

# List of time points to compare
time_points = ['FU0M', 'FU3M', 'FU12M']

# Perform pairwise Wilcoxon signed-rank tests and store the results for each column
for column in all_columns:
    comparisons_results = {}
    for i in range(len(time_points)-1):
        for j in range(i+1, len(time_points)):
            tp1, tp2 = time_points[i], time_points[j]
            x1 = all_fus_df.loc[all_fus_df['TimePoint'] == tp1, column]
            x2 = all_fus_df.loc[all_fus_df['TimePoint'] == tp2, column]

            statistic, p_value = wilcoxon(x1, x2)

            comparison_name = f"{column}_{tp1}-{tp2}"


            comparisons_results[comparison_name] = {'Statistic': statistic, 'Original_p-values': p_value}
    
    wilcoxon_results[column] = comparisons_results

# Convert the dictionary to a DataFrame
results_df = pd.DataFrame({(column, key): value for column, values in wilcoxon_results.items() for key, value in values.items()}).T


## Adjust for multiple comparisons
reject, corrected_p_values, _, _ = multipletests(results_df['Original_p-values'],
                                                 alpha = 0.05, 
                                                 method='bonferroni')
results_df['Corrected_p-values'] = corrected_p_values

significance_conds = [
    (results_df['Corrected_p-values'] <= 0.001),
    (results_df['Corrected_p-values'] <= 0.01),
    (results_df['Corrected_p-values'] < 0.05),
    (results_df['Corrected_p-values'] >= 0.05)
]

values = ['***', '**', '*', 'n.s.']

results_df['Significance_multcomp'] = np.select(significance_conds, values, default = 'Other')
results_df

In [None]:
results_df.to_excel(os.path.join(
    json_path_onedrive, 'results', 'Avg_Features', 'test_results','PairwiseComps.xlsx'
))

In [None]:
all_fus_df[(all_fus_df['TimePoint'] == 'FU0M') & (all_fus_df['Electrode'] == 'SenSight')]

### Make correlations with TEED

In [None]:
directory_Feat = os.path.join(json_path_onedrive,
    'results', 'Avg_Features', 'Avg_Features_Tbls')

directory_TEED = os.path.join(json_path_onedrive,
    'results', 'Stim_pars', 'TEED')

directory_corrs = os.path.join(json_path_onedrive,
    'results', 'Correlations')

saving = 1

In [None]:
reload(results_functions)
%matplotlib qt
corr_df  = results_functions.get_battery_corr_df(directory_Feat, 
                                                directory_TEED, 
                                                directory_corrs, 
                                                saving)



In [None]:
corr_df

In [None]:
reload(results_functions)
saving = 1
correlation_stats = results_functions.corrs_scatters(corr_df, saving, directory_corrs)
#correlation_stats

In [None]:
correlation_stats

In [None]:
#1. Correct the spearman correlations for multiple comparisons

corrs_pvalues = [item['p-value'] for item in list(correlation_stats.values())]

reject, corrected_p_values, _, _ = multipletests(corrs_pvalues,
                                                 alpha = 0.05, 
                                                 method='bonferroni')

corrected_p_values

In [None]:
corr_df.columns

In [None]:
#Multiple Linear Regression
import statsmodels.api as sm

# Specify the formula for the mixed-effects model
corr_df[['Telemetry_AllSec_div', 'SensDurSumSec_div', 'Chronic_12mfu_Days', 'TEED']] = corr_df[['Telemetry_AllSec_div', 'SensDurSumSec_div', 'Chronic_12mfu_Days', 'TEED']].astype(float)
X = corr_df[['Telemetry_AllSec_div', 'SensDurSumSec_div', 'Chronic_12mfu_Days', 'TEED']]
X = sm.add_constant(X)  # add a constant term for the intercept
y = corr_df['Battery_12mfu']

model = sm.OLS(y, X).fit()
print(model.summary())

### Linear Regression with TEED

In [None]:
#Then Run a simplified linear regression only with the significant value TEED:

X = sm.add_constant(corr_df['TEED'])  # Assuming 'TEED' is the independent variable
y = corr_df['Battery_12mfu']           # Assuming 'Battery_12mfu' is the dependent variable

# Fit the simple linear regression model
model2 = sm.OLS(y, X).fit()

# Display the model summary
print(model2.summary())

In [None]:
#coefficients/Parameter estimates
model2.params
#const = intercept
#coefficient for TEED


In [None]:
#Plot the model
X_teeds = sm.add_constant(corr_df['TEED'])
y_predicted = model2.predict(X_teeds)

plt.scatter(corr_df['TEED'], corr_df['Battery_12mfu'], label='Actual Data')
plt.plot(corr_df['TEED'], y_predicted, color='red', label='Regression Line')
plt.xlabel('TEED')
plt.ylabel('Battery_12mfu')
plt.legend()
plt.show()

#### Test assumptions for linear regression

In [None]:
#Test Assumptions:
import seaborn as sns

#1. Linearity and 
residuals = model2.resid
fitted_values = model2.fittedvalues

fig, axs = plt.subplots(1,2)
axs[0].scatter(fitted_values, residuals)
axs[0].set_xlabel('Fitted Values')
axs[0].set_ylabel('Residuals')
axs[0].set_title('Residuals vs. Fitted Values Plot')

#Interpretation: Check for a random scatter of points with no discernible pattern. 
# A pattern may indicate non-linearity or heteroscedasticity.

#2. Homoscedasticity
axs[1].scatter(fitted_values, abs(np.sqrt(np.abs(residuals))))
axs[1].set_xlabel('Fitted Values')
axs[1].set_ylabel('Square Root of Standardized Residuals')
axs[1].set_title('Scale-Location Plot')
plt.show()
#Interpretation: Check for a horizontal line with no clear pattern. 
# A funnel-shaped pattern may indicate heteroscedasticity.

In [None]:
#3. Normality of Residuals
sm.qqplot(residuals, line='s')
#Interpretation: Points close to the diagonal 
# line suggest that residuals are approximately normally distributed.

from scipy.stats import shapiro

stat, p_value = shapiro(residuals)

print(f'Shapiro-Wilk Test Statistic: {stat:.4f}, p-value: {p_value:.4f}')
#small p-value suggests that the residuals are not normally distributed.