# with stats stars in graph

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest
import os
import matplotlib.font_manager as font_manager

# turn on size and speed filtering
filter_size_speed=True

# Load the CSV file
file_path = r'C:\Users\Franz\OneDrive\_PhD\My_Papers\Volvox_Uncertainty_Minimization\Data\Figure3_and_4_Volvox_Carteri\final_combined_trajectories_filtered_all_light_conditions.csv'
data = pd.read_csv(file_path)

save_dir=r'C:\Users\Franz\OneDrive\_PhD\My_Papers\Volvox_Uncertainty_Minimization\Individual_Graphs'


# Filter particles based on size (120-150 μm), speed (> 5), and trial (≤ 5)
if filter_size_speed:
    data = data[(data['size (um)'] >= 120) &
                (data['size (um)'] <= 150) &
                (data['speed'] > 1) &
                (data['speed'] < 3) &
                (data['trial'] <= 5)]
    
# if filter_size_speed:
#     data = data[(data['size (um)'] >= 120) &
#                           (data['size (um)'] <= 150) &
#                           (data['speed'] > 5)]
    
else:
    data = data[data['trial'] <= 5]


# Get the directory of the input file
output_dir = os.path.dirname(file_path)

# Define font styles
titlefont = {'fontname': 'Candara', 'size': 18}
figurefont = {'fontname': 'Candara', 'size': 16}
tickfont = {'fontname': 'Candara', 'size': 14}
font = font_manager.FontProperties(family='Candara', math_fontfamily='custom', size=12)

# Function to filter frames based on 80% of max particles
def filter_frames(group):
    max_particles = group['particles_in_frame'].max()
    threshold = 0.8 * max_particles
    return group[group['particles_in_frame'] >= threshold]

# Apply the filter_frames function to each group
data_filtered = data.groupby(['chamber', 'trial', 'chamber_side']).apply(filter_frames).reset_index(drop=True)

# Identify the columns that can be averaged (numeric columns except 'frame_bin' and 'trial')
numeric_columns = data_filtered.select_dtypes(include=[np.number]).columns
numeric_columns = numeric_columns[~numeric_columns.isin(['chamber', 'trial', 'chamber_side'])]

# Compute the mean of numeric columns for each group
data_averaged = data_filtered.groupby(['chamber', 'trial', 'chamber_side'])[numeric_columns].mean().reset_index()

# If you want to keep non-numeric columns as well, you can merge the mean values with the first occurrence of non-numeric columns
non_numeric_cols = data_filtered.select_dtypes(exclude=[np.number]).drop_duplicates(subset=['chamber', 'chamber_side'])
data_final = pd.merge(non_numeric_cols, data_averaged, on=['chamber', 'chamber_side'])

data_filtered=data_final.copy()

# Calculate the chamber width based on the range of x values
x_min = data_filtered['x'].min()
x_max = data_filtered['x'].max()
chamber_width = x_max - x_min

# Calculate the average x position for each particle
data_filtered['avg_x_percent'] = (data_filtered['x'] - x_min) / chamber_width * 100

# Create custom colormaps
cmap_1hz_random = LinearSegmentedColormap.from_list('custom', ['magenta', 'white', 'green'], N=100)
cmap_1hz_2hz = LinearSegmentedColormap.from_list('custom', ['green', 'white', 'lime'], N=100)

def plot_position_bar_chart_and_ttest(data, title, cmap, ylabel_right,xlabel,save_dir):
    overall_mean = data['avg_x_percent'].mean()
    overall_sem = data['avg_x_percent'].sem()
    print('overall mean = ',overall_mean,'overall standard error of the mean = ',overall_sem)
    fig, ax = plt.subplots(figsize=(4, 6))

    # Calculate the color based on the mean value
    color = cmap(overall_mean / 100)  # Normalize to [0, 1] range

    # Plot the bar
    bar_height = overall_mean - 50  # Deviation from center
    bar_height_star = overall_mean # Deviation from center
    bar = ax.bar(0, bar_height, yerr=overall_sem, color=color, edgecolor='black', capsize=10, width=0.3, bottom=50)

    ax.set_title(title, **titlefont)
    ax.set_ylabel('Average X Position (% of chamber width)', **figurefont)    
#     plt.xlabel(xlabel.replace('1Hz','1 Hz'), **figurefont)
    ax.set_ylim(30, 70)
    ax.set_xlim(-0.5, 0.5)
    ax.set_xticks([])
    ax.axhline(y=50, color='black', linestyle='--', linewidth=0.8)

    # Apply font to tick labels
    ax.tick_params(axis='both', which='major', labelsize=tickfont['size'],labelfontfamily=tickfont['fontname'])

    # Create a colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=30, vmax=70))
    sm.set_array([])

    # Adjust the position of the main axes to make room for the colorbar
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])

    # Add the colorbar
    cbar = plt.colorbar(sm, ax=ax, orientation='vertical', pad=0.1)
    cbar.set_ticks([30, 40, 50, 60, 70])
    if xlabel=='1Hz vs 2Hz':
        cbar.set_ticklabels(['1 Hz side', '', 'Center', '', '2 Hz side'])
    else:
        cbar.set_ticklabels([ylabel_right, '', 'Center', '', '1 Hz side'])

    cbar.ax.tick_params(labelsize=figurefont['size'],labelfontfamily=figurefont['fontname'])

    # Add sample size
    sample_size = len(data)
    ax.text(0.95, 0.05, f'n={sample_size}', transform=ax.transAxes, **figurefont,
            verticalalignment='bottom', horizontalalignment='right')

    # Perform one-tailed t-test
    t_stat, p_value = stats.ttest_1samp(data['avg_x_percent'], 50)

    # Adjust p-value for one-tailed test
    p_value_one_tailed = p_value / 2 if t_stat > 0 else 1 - (p_value / 2)

    # Calculate Cohen's d
    cohens_d = (overall_mean - 50) / data['avg_x_percent'].std()

    # Add significance stars
    if p_value_one_tailed < 0.001:
        significance = '***'
    elif p_value_one_tailed < 0.01:
        significance = '**'
    elif p_value_one_tailed < 0.05:
        significance = '*'
    else:
        significance = 'ns'

#     ax.text(0, bar_height + overall_sem + 1, significance, ha='center', va='bottom', fontsize=figurefont['size'])
    ax.text(0, bar_height_star + overall_sem + 2, significance, ha='center', va='bottom', **figurefont)


    plt.tight_layout()

    # Save the figure
    filename=title.replace('$\\mathit{','').replace('}$','').replace('\n', '')
    if filter_size_speed:
        filename = f"{filename.replace(' ', '_').replace(':', '').replace('mathit{', '').replace('}$', '')}_filtered.png"
    else:        
        filename = f"{filename.replace(' ', '_').replace(':', '').replace('mathit{', '').replace('}$', '')}.png"
    fig.savefig(os.path.join(save_dir, filename), dpi=300, bbox_inches='tight')

    # Show the plot
    plt.show()

    print(f"\nStatistics for {title}")
    print(f"Overall average X position (% of chamber width): {overall_mean:.2f}%")
    print(f"Deviation from center: {bar_height:.2f}%")
    print(f"Standard error: {overall_sem:.2f}%")
    print(f"Number of particles: {len(data)}")
    print(f"One-tailed t-test results:")
    print(f"t-statistic: {t_stat:.4f}")
    print(f"p-value (one-tailed): {p_value_one_tailed:.6f}")
    if p_value_one_tailed < 0.001:
        print("The result is extremely significant (p < 0.001) ***")
    elif p_value_one_tailed < 0.01:
        print("The result is very significant (p < 0.01) **")
    elif p_value_one_tailed < 0.05:
        print("The result is significant (p < 0.05) *")
    else:
        print("The result is not statistically significant (p >= 0.05)")
    print(f"Cohen's d: {cohens_d:.4f}")

# Add the font file
font_manager.fontManager.addfont(r'C:\Users\Franz\OneDrive\_PhD\Code\python_packages\candara-font-family\Candara.ttf')

plt.rcParams['mathtext.fontset'] = 'custom' # supported values are ['dejavusans', 'dejavuserif', 'cm', 'stix', 'stixsans', 'custom']
plt.rcParams['mathtext.it'] = 'Candara:italic'
# plt.rcParams['mathtext.fontsize'] = 12

title='$\mathit{Volvox}$ $\mathit{aureus}$ Phototactic Bias\n 1Hz vs. Random'
# Plot and perform tests for '1Hz vs Random'
xlabel='1Hz vs Random'
title='$\mathit{Volvox}$ $\mathit{carteri}$ Phototactic Bias\n 1 Hz vs. Random'
data_1hz_random = data_filtered[data_filtered['Light Condition'] == xlabel]
plot_position_bar_chart_and_ttest(data_1hz_random, title, cmap_1hz_random, 'Random side',xlabel,save_dir)

xlabel='1Hz vs 2Hz'
title='$\mathit{Volvox}$ $\mathit{carteri}$ Phototactic Bias\n 2 Hz vs. 1 Hz'
data_1hz_2hz = data_filtered[data_filtered['Light Condition'] == xlabel]
# plot_position_bar_chart_and_ttest(data_1hz_2hz, title, cmap_1hz_2hz, '2Hz side',xlabel,save_dir)
## Flip 1 and 2 Hz for consistency:
data_1hz_2hz.avg_x_percent=100-data_1hz_2hz.avg_x_percent
plot_position_bar_chart_and_ttest(data_1hz_2hz, title, cmap_1hz_2hz, '2Hz side',xlabel,save_dir)

# Print overall statistics
print(f"\nOverall Statistics")
print(f"Total frames before filtering: {len(data)}")
print(f"Total frames after filtering: {len(data_filtered)}")
print(f"Frames removed: {len(data) - len(data_filtered)}")
print(f"Percentage of frames removed: {(1 - len(data_filtered) / len(data)) * 100:.2f}%")
print(f"Calculated chamber width: {chamber_width:.2f}")