In [None]:
import os
import pandas as pd
import seaborn as sns
import glob

file_loc = [f'./image_{i}_filtered_DataCalled.csv' for i in range(1, 9)]
file_loc_2 = [f'./image_{l}_filtered_ClusterStats.csv' for l in range(1, 9)]
result_loc = [f'image_{i}' for i in range(1, 9)]


def filter_non_specific_binding(df, threshold_mean_lower, threshold_mean_upper, threshold_std):
    """
    Filters non-specific binding events from the DataFrame using mean and standard deviation of the frame.

    Parameters:
    - df: DataFrame containing cluster data.
    - threshold_mean_lower: Lower threshold for mean frame filtering.
    - threshold_mean_upper: Upper threshold for mean frame filtering.
    - threshold_std: Threshold for standard deviation frame filtering.

    Returns:
    - Filtered DataFrame.
    """
    def filter_mean(x):
        return threshold_mean_lower < x['frame'].mean() < threshold_mean_upper

    def filter_std(x):
        return x['frame'].std() > threshold_std

    df_filtered_mean = df.groupby('clusterID').filter(filter_mean)
    df_filtered_std = df_filtered_mean.groupby('clusterID').filter(filter_std)

    return df_filtered_std


def filter_receptor_artificial_clusters(df_cluster_stats, df_filtered_clusters):
    """
    Filters receptor and artificial clusters from the cluster stats DataFrame.

    Parameters:
    - df_cluster_stats: DataFrame containing cluster statistics.
    - df_filtered_clusters: DataFrame containing filtered clusters.

    Returns:
    - Two DataFrames: df_receptor_clusters_final and df_artificial_clusters_final.
    """
    list_final = list(df_filtered_clusters['clusterID'])
    df_receptor_clusters_final = df_cluster_stats[df_cluster_stats['ClusterID'].isin(list_final)]
    df_artificial_clusters_final = df_cluster_stats[~df_cluster_stats['ClusterID'].isin(list_final)]

    return df_receptor_clusters_final, df_artificial_clusters_final


def calculate_cluster_statistics(df):
    """
    Calculates statistics for the receptor clusters DataFrame.

    Parameters:
    - df: DataFrame containing receptor clusters.

    Returns:
    - DataFrame containing calculated statistics.
    """
    points_median = df['Points'].median()
    area_median = df['Area (um2)'].median()
    density_median = df['Density (pts/um2)'].median()
    cluster_number = df['Area (um2)'].count()
    data = [[area_median, density_median, points_median, cluster_number]]
    stat = pd.DataFrame(data, columns=['Area', 'Density', 'Points', 'Clusters'])

    return stat


def loop_function(file_name, file_name_2, result_name, threshold_mean_lower, threshold_mean_upper, threshold_std):
    """
    Main loop function to process multiple files, filter non-specific binding events, and save results.

    Parameters:
    - file_name: Path to the first CSV file containing data to be filtered.
    - file_name_2: Path to the second CSV file containing cluster statistics.
    - result_name: Name used for saving output files.
    - threshold_mean_lower: Lower threshold for mean frame filtering.
    - threshold_mean_upper: Upper threshold for mean frame filtering.
    - threshold_std: Threshold for standard deviation frame filtering.

    Returns:
    - 0 (indicating successful completion).
    """
    df_initial = pd.read_csv(file_name, delimiter=',')
    df_initial_2 = pd.read_csv(file_name_2, delimiter=',')

    df_filtered = filter_non_specific_binding(df_initial, threshold_mean_lower, threshold_mean_upper, threshold_std)

    df_receptor_clusters_final, df_artificial_clusters_final = filter_receptor_artificial_clusters(
        df_initial_2, df_filtered)

    stat = calculate_cluster_statistics(df_receptor_clusters_final)

    # Save output
    stat.to_csv(f'{result_name}_stat.csv', index=False)
    df_receptor_clusters_final.to_csv(f'{result_name}_result.csv', index=False)

    return 0


if __name__ == "__main__":
    for file_name, file_name_2, result_name in zip(file_loc, file_loc_2, result_loc):
        loop_function(file_name, file_name_2, result_name, 1000, 9000, 500)

    # Save all stats processed in one csv file
    csv_files = glob.glob('*_stat.{}'.format('csv'))
    df_append = pd.DataFrame()

    for file in csv_files:
        df_temp = pd.read_csv(file)
        df_append = df_append.append(df_temp, ignore_index=True)

    df_append.to_csv('allstats.csv')
