# combined

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest
import os
import matplotlib.font_manager as font_manager
import re
# turn on size and speed filtering
filter_size_speed=False

base_directory_path = r'C:\Users\Franz\OneDrive\_PhD\My_Papers\Volvox_Uncertainty_Minimization\Data\Figure9_Adapted_Volvox_Photobiases_Data'


# Load the CSV file
file_path = base_directory_path+ r'\final_combined_trajectories_filtered_all_light_conditions.csv'
data = pd.read_csv(file_path)

save_dir=r'C:\Users\Franz\OneDrive\_PhD\My_Papers\Volvox_Uncertainty_Minimization\Individual_Graphs'



if filter_size_speed:
    data = data[(data['size (um)'] >= 120) &
                          (data['size (um)'] <= 150) &
                          (data['speed'] > 5)]

# Get the directory of the input file
output_dir = os.path.dirname(file_path)

# Define font styles
titlefont = {'fontname': 'Candara', 'size': 18}
figurefont = {'fontname': 'Candara', 'size': 16}
tickfont = {'fontname': 'Candara', 'size': 14}
font = font_manager.FontProperties(family='Candara', math_fontfamily='custom', size=12)

# Function to filter frames based on 80% of max particles
def filter_frames(group):
    max_particles = group['particles_in_frame'].max()
    threshold = 0.8 * max_particles
    return group[group['particles_in_frame'] >= threshold]

# Apply the filter_frames function to each group
data_filtered = data.groupby(['chamber', 'trial', 'Media']).apply(filter_frames).reset_index(drop=True)

# Identify the columns that can be averaged (numeric columns except 'frame_bin' and 'trial')
numeric_columns = data_filtered.select_dtypes(include=[np.number]).columns
numeric_columns = numeric_columns[~numeric_columns.isin(['chamber', 'trial', 'Media'])]

# Compute the mean of numeric columns for each group
data_averaged = data_filtered.groupby(['chamber', 'trial', 'Media'])[numeric_columns].mean().reset_index()

# If you want to keep non-numeric columns as well, you can merge the mean values with the first occurrence of non-numeric columns
non_numeric_cols = data_filtered.select_dtypes(exclude=[np.number]).drop_duplicates(subset=['chamber', 'Media'])
data_final = pd.merge(non_numeric_cols, data_averaged, on=['chamber', 'Media'])

data_filtered=data_final.copy()

# Calculate the chamber width based on the range of x values
x_min = data_filtered['x'].min()
x_max = data_filtered['x'].max()
chamber_width = x_max - x_min

# Calculate the average x position for each particle
data_filtered['avg_x_percent'] = (data_filtered['x'] - x_min) / chamber_width * 100

# Create custom colormaps
cmap_1hz_random = LinearSegmentedColormap.from_list('custom', ['magenta', 'white', 'green'], N=100)
cmap_1hz_2hz = LinearSegmentedColormap.from_list('custom', ['green', 'white', 'lime'], N=100)

def plot_position_bar_chart_and_ttest(data, title, cmap, ylabel_right):
    fig, ax = plt.subplots(figsize=(10, 6))

    # Separate data for Alga Gro and Nimodipine
    normal_data = data[data['Media'] == 'Normal']
    continuous_data = data[data['Media'] == 'Continuous']
    fixed_data = data[data['Media'] == 'Fixed']
    random_data = data[data['Media'] == 'Random']

    # Calculate means and SEMs
    normal_mean = normal_data['avg_x_percent'].mean()
    normal_sem = normal_data['avg_x_percent'].sem()
    continuous_mean = continuous_data['avg_x_percent'].mean()
    continuous_sem = continuous_data['avg_x_percent'].sem()
    fixed_mean = fixed_data['avg_x_percent'].mean()
    fixed_sem = fixed_data['avg_x_percent'].sem()
    random_mean = random_data['avg_x_percent'].mean()
    random_sem = random_data['avg_x_percent'].sem()
    
    # Set bar positions
    bar_width = 0.35
    r1 = 0
    r2 = 1
    r3 = 2
    r4 = 3

    # Plot bars
    bar1 = ax.bar(r1, normal_mean - 50, yerr=normal_sem, color=cmap(normal_mean / 100), 
                  edgecolor='black', capsize=10, width=bar_width, bottom=50, label='Normal')
    bar2 = ax.bar(r2, continuous_mean - 50, yerr=continuous_sem, color=cmap(continuous_mean / 100), 
                  edgecolor='black', capsize=10, width=bar_width, bottom=50, label='Continuous')
    bar3 = ax.bar(r3, fixed_mean - 50, yerr=fixed_sem, color=cmap(fixed_mean / 100), 
                  edgecolor='black', capsize=10, width=bar_width, bottom=50, label='Fixed')
    bar4 = ax.bar(r4, random_mean - 50, yerr=random_sem, color=cmap(random_mean / 100), 
                  edgecolor='black', capsize=10, width=bar_width, bottom=50, label='Random')

    # Set labels and title
    ax.set_title(title, **titlefont)
    ax.set_ylabel('Average X Position (% of chamber width)', **figurefont)
    ax.set_ylim(30, 80)
    ax.set_xlim(-0.5, 3.5)
    ax.set_xticks([r1,r2,r3,r4])
    ax.set_xticklabels(['Normal', 'Continuous', 'Fixed', 'Random'])
    ax.axhline(y=50, color='black', linestyle='--', linewidth=0.8)

    # Apply font to tick labels
    ax.tick_params(axis='both', which='major', labelsize=tickfont['size'],labelfontfamily=tickfont['fontname'])

    # Create a colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=30, vmax=70))
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, orientation='vertical', pad=0.1)
    cbar.set_ticks([30, 40, 50, 60, 70])
    cbar.set_ticklabels([ylabel_right, '', 'Center', '', '1 Hz side'])
    cbar.ax.tick_params(labelsize=tickfont['size'],labelfontfamily=tickfont['fontname'])

    # Add sample sizes
    ax.text(r1, 30, f'n={len(normal_data)}', ha='center', va='bottom', **figurefont)
    ax.text(r2, 30, f'n={len(continuous_data)}', ha='center', va='bottom', **figurefont)

    ax.text(r3, 30, f'n={len(fixed_data)}', ha='center', va='bottom', **figurefont)
    ax.text(r4, 30, f'n={len(random_data)}', ha='center', va='bottom', **figurefont)

    # Perform t-tests and add significance stars
    for data, mean, sem, x_pos in [(normal_data, normal_mean, normal_sem, r1), 
                                   (continuous_data, continuous_mean, continuous_sem, r2), 
                                  (fixed_data, fixed_mean, fixed_sem, r3), 
                                  (random_data, random_mean, random_sem, r4)]:
        t_stat, p_value = stats.ttest_1samp(data['avg_x_percent'], 50)
        p_value_one_tailed = p_value / 2 if t_stat > 0 else 1 - (p_value / 2)
        
        if p_value_one_tailed < 0.001:
            significance = '***'
        elif p_value_one_tailed < 0.01:
            significance = '**'
        elif p_value_one_tailed < 0.05:
            significance = '*'
        else:
            significance = 'ns'
        
        ax.text(x_pos, mean + sem + 2, significance, ha='center', va='bottom', **figurefont)

#     plt.legend()
    plt.tight_layout()

    # Save the figure
    filename=title.replace('$\\mathit{','').replace('}$','').replace('\n', '')
    if filter_size_speed:
        filename = f"{filename.replace(' ', '_').replace(':', '').replace('mathit{', '').replace('}$', '')}_filtered.png"
    else:        
        filename = f"{filename.replace(' ', '_').replace(':', '').replace('mathit{', '').replace('}$', '')}.png"
    fig.savefig(os.path.join(save_dir, filename), dpi=300, bbox_inches='tight')

    plt.show()

    # Print statistics
    print(f"\nStatistics for {title}")
    print(f"Nornal - Average X position: {normal_mean:.2f}%, SEM: {normal_sem:.2f}%, n={len(normal_data)}")
    print(f"Continuous - Average X position: {continuous_mean:.2f}%, SEM: {continuous_sem:.2f}%, n={len(continuous_data)}")
    print(f"Fixed - Average X position: {fixed_mean:.2f}%, SEM: {fixed_sem:.2f}%, n={len(fixed_data)}")
    print(f"Random - Average X position: {random_mean:.2f}%, SEM: {random_sem:.2f}%, n={len(random_data)}")

# Call the function for each light condition
        
plt.rcParams['mathtext.fontset'] = 'custom' # supported values are ['dejavusans', 'dejavuserif', 'cm', 'stix', 'stixsans', 'custom']
plt.rcParams['mathtext.it'] = 'Candara:italic'

light_condition='1Hz vs Random'
condition_data = data_filtered[data_filtered['Light Condition'] == light_condition]
light_condition_updated=light_condition.replace('1Hz','1 Hz').replace('vs ','vs. ')
title='$\\mathit{Volvox}$ $\\mathit{carteri}$ Phototactic Bias '+light_condition_updated+'\n After Longterm Light Pattern Exposure'
cmap = cmap_1hz_random
ylabel_right = 'Random side' 
plot_position_bar_chart_and_ttest(condition_data, title, cmap, ylabel_right)