In [None]:
import numpy, seaborn, pandas
import scipy.stats as stats
from scipy.stats import gaussian_kde
import matplotlib, matplotlib.pyplot as plt

import re #to search

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 90% !important; }</style>"))

In [None]:
file_directory = "/Users/kja11/OneDrive - Menntaský (1)/Mild_Hypothermia_project/in_silico/Python/"

input_path = "1) input/flow_cytometry/"
output_path = "3) output/flow_cytometry/"

# Data Download and modification

## Meta data

In [None]:
# Meta data, important for sample_name
df_meta = pandas.read_csv(file_directory + input_path + 'sumoylation_exp2_analyze_table.csv', sep = ',')

# Interested in name and Mode (the most frequently occurring value)
df_meta = df_meta.rename(columns = {'Unnamed: 0' : 'sample_name',
                                    'HEK293/Single Cells | Geometric Mean (FL3-A :: mCherry-A)' : 'gMFI'})
print(df_meta.shape, '\n')
df_meta.tail(5)

In [None]:
# remove the last rows
df_meta = df_meta[(df_meta['sample_name'] != 'Mean') & (df_meta['sample_name'] != 'SD')]
df_meta.tail()

In [None]:
# Have to rename the samples to have clear labelling  
sample_name = df_meta.sample_name

# select the three meta info
df_meta['Temperature'] = sample_name.apply(lambda x: re.search(r'\b(32|37)\b', x).group()
                                           if re.search(r'\b(32|37)\b', x) 
                                           else None)

df_meta['mhi-target'] = sample_name.apply(lambda x: re.search(r'(rbm3|sp1|cirbp)', x, re.IGNORECASE).group().lower()
                                           if re.search(r'(rbm3|sp1|cirbp)', x, re.IGNORECASE) 
                                           else None)

df_meta['condition'] = sample_name.apply(lambda x: re.search(r'(Unstained|ctl|sumo1a|sumo1b|sumo2a|sumo2b)', x, re.IGNORECASE).group()
                                          if re.search(r'(Unstained|ctl|sumo1a|sumo1b|sumo2a|sumo2b)', x, re.IGNORECASE) 
                                          else None)
df_meta

In [None]:
# to not have None and create a clean label
df_meta.loc[df_meta['condition'] == 'Unstained', ['mhi-target', 'Temperature']] = ['HEK293', 'control']

label_serie = df_meta[['mhi-target', 'condition', 'Temperature']].agg('_'.join, axis=1)
df_meta.insert(0, "label", label_serie)
df_meta.head()

In [None]:
# Calculate Geometric mean fluorescence ratio
gMFI_control = df_meta[df_meta.label == 'HEK293_Unstained_control']['gMFI'].iloc[0]
df_meta = df_meta.assign(gMFI_ratio = (df_meta.gMFI/gMFI_control))

# Add SampleID to merge with the flow data
df_meta['SampleID'] = ''
df_meta['SampleID'] = range(1, len(df_meta) + 1)
df_meta = df_meta[['label','SampleID', 'gMFI_ratio']]
df_meta.head()

## Flow data

In [None]:
#data Flow Cytometry
df = pandas.read_csv(file_directory + input_path + 'concatsimple_1.csv', sep = ',')

print(df.shape, '\n')
df

In [None]:
print(f'Amount of SampleID: {df.SampleID.max()-1}')
print(f'Amount of meta-info label: {len(df_meta.label.unique())}')

## Merge

In [None]:
# merge 
df = df_meta.merge(df)
df = df.drop(['SampleID', 'TIME', 'gMFI_ratio'], axis=1)
df

In [None]:
df.label.unique()

# Graphics

In [None]:
# Define a function to process data and calculate KDE
def process_and_plot(sample_data, color, label):
    data = sample_data['mCherry-A'].apply(numpy.log10)
    data = data[numpy.isfinite(data)] #not have infinte or nan
    
    # perform Kernel Density Estimation
    kde = gaussian_kde(data)
    x = numpy.linspace(min(data), max(data), 1000)
    y = kde(x)
    
    # normalize to have y from 0 to 100
    peak_y = numpy.max(y)
    normalized_y = (y / peak_y) * 100
    
    # find Mode
    mode_index = numpy.argmax(y)
    mode_value = x[mode_index]

    # Plot 
    plt.plot(x, normalized_y, color=color, alpha=0.7, label=f'{label}; {MHI.upper()}-MHI si{condition.upper()}' if label.startswith('3') else f'{label}')
    plt.fill_between(x, normalized_y, color=color, alpha=0.3) # fill the curve
    plt.axvline(mode_value, color=color, linestyle='--') # add vertical line 
    
    return mode_value # return to see if one sample is missing 

In [None]:
# Define parameters
unstained_control = 'HEK293_Unstained_control'
temperatures = [32, 37]
MHIs = ['rbm3', 'sp1', 'cirbp']
conditions = ['ctl', 'sumo1a', 'sumo1b', 'sumo2a', 'sumo2b']

# Loop over MHIs and conditions
missing_samples_l = []

for MHI in MHIs:
    for condition in conditions:
        fig = plt.figure(figsize=(10, 6))

        # Plot the Unstained control
        sample_control = df[df['label'] == unstained_control]
        process_and_plot(sample_control, 'gray', 'Unstained Control')
        
        mode_32 = None
        mode_37 = None
        missing_sample = False
        
        for temp, color in zip(temperatures, ['blue', 'red']):
            label = f'{MHI}_{condition}_{temp}'
            sample = df[df['label'] == label]
            
            # if one sample is missing, stop
            if sample.empty:
                missing_samples_l.append(label)
                missing_sample = True
                break 
            
            # save mode value to calculate % later
            mode_value = process_and_plot(sample, color, f'{temp}°C')
            if temp == 32:
                mode_32 = mode_value
            elif temp == 37:
                mode_37 = mode_value
                
        # Close the plot if any sample is missing        
        if missing_sample:
            plt.close(fig)  
            continue
                
        # Calculate and annotate percentage change if both modes are available
        if mode_32 is not None and mode_37 is not None:
            percent_change = ((mode_32 - mode_37) / mode_37) * 100
            plt.text(mode_32 + 0.1, 100, f'{percent_change:+.2f}%', color='blue', fontsize=10)

        # Add labels and legend
        plt.xlabel('Log10(mCherry-A Intensity)')
        plt.ylabel('Normalized Density')
        plt.title(f'KDE Plot for {MHI.upper()}-MHI si{condition.upper()}')
        plt.legend(loc='upper left')

        # Save the graph
        filename = f'{MHI}_{condition}_KDE_plot.png'
#         plt.savefig(filename)
        plt.show()
        plt.close(fig)

#print missing samples   
missing_samples_l