In [63]:
import FlowCal as fc
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import random
import numpy as np

In [64]:
dir_path = r'./raw_fcs/*/*.fcs'
file_names = glob.glob(dir_path)

In [65]:
with open(r'./EU_marker_channel_mapping.xlsx','rb') as f:
    columns = pd.read_excel(f)
    columns = columns.loc[columns.use==1, 'PxN(channel)']
columns = columns.to_list()
columns

['SSC-W',
 'SSC-A',
 'SSC-H',
 'FSC-W',
 'FSC-A',
 'FSC-H',
 'FJComp-BUV737-A',
 'FJComp-APC-A',
 'FJComp-BV711-A',
 'FJComp-BB700-P-A',
 'FJComp-BB630-A',
 'FJComp-BUV395-A',
 'FJComp-BUV563-A',
 'FJComp-BV480-A',
 'FJComp-BV421-A',
 'FJComp-BV650-A',
 'FJComp-BYG584-A',
 'FJComp-PE-CF594-A',
 'FJComp-BUV615-P-A',
 'FJComp-BUV805-A',
 'FJComp-BYG790-A',
 'FJComp-PE-Cy5.5-A',
 'FJComp-BV570-A',
 'FJComp-BUV496-A',
 'FJComp-BV605-A',
 'FJComp-BV786-A',
 'FJComp-APC-R700-A',
 'FJComp-BYG670-A',
 'FJComp-BV750-P-A',
 'FJComp-FITC-A',
 'FJComp-APC-H7-A']

In [66]:
def plot_fig(s, idx):
    # plt.rcParams["figure.figsize"] = [7.00, 3.50]
    # plt.rcParams["figure.autolayout"] = True
    s = fc.gate.density2d(s, channels=['FSC-A', 'FSC-H'], gate_fraction=0.75)
    fc.plot.density2d(s, channels=['FSC-A', 'FSC-H'], mode='scatter')
    
    plt.style.use("ggplot")     # 使用ggplot主題樣式
    plt.xlabel("FSC-A", fontweight = "bold")                  #設定x座標標題及粗體
    plt.ylabel("FSC-H", fontweight = "bold")   #設定y座標標題及粗體
    plt.title("Scatter of fcs (FSC-A, FSC-H)",
              fontsize = 15, fontweight = "bold")        #設定標題、字大小及粗體

   
    plt.savefig("./img/{}.jpg".format(idx))   #儲存圖檔
    plt.close()      # 關閉圖表

In [67]:
data_dict = {}
for idx, file_name in enumerate(file_names):
    data_dict[file_name[10:40]] = fc.io.FCSData(file_name)
    # plot_fig(data_dict[file_name[10:40]], file_name[10:40])

In [68]:
def make_dataset(idx):
    x = pd.DataFrame(data_dict[idx][:, columns], columns = columns).astype(np.float64).describe().drop('count')

    x = x.melt(value_vars=columns, ignore_index=False)

    x.index = x.variable + '_' + x.index

    x = x.T.drop('variable')
    x['file_flow_id'] = idx
    
    return x
    
data = pd.concat([make_dataset(file_flow_id) for file_flow_id in data_dict.keys()], ignore_index=True)

In [69]:
with open(r'./EU_label.xlsx','rb') as f:
    label = pd.read_excel(f)

In [70]:
data = data.merge(label, how='left', on = 'file_flow_id')

In [71]:
labelizer = {'Healthy': False, 'Sick': True}
data['label'] = data['label'].apply(lambda x: labelizer[x])

In [72]:
data.to_csv(r'./data.csv', index=False)

In [76]:
data.columns.to_list()

['SSC-W_mean',
 'SSC-W_std',
 'SSC-W_min',
 'SSC-W_25%',
 'SSC-W_50%',
 'SSC-W_75%',
 'SSC-W_max',
 'SSC-A_mean',
 'SSC-A_std',
 'SSC-A_min',
 'SSC-A_25%',
 'SSC-A_50%',
 'SSC-A_75%',
 'SSC-A_max',
 'SSC-H_mean',
 'SSC-H_std',
 'SSC-H_min',
 'SSC-H_25%',
 'SSC-H_50%',
 'SSC-H_75%',
 'SSC-H_max',
 'FSC-W_mean',
 'FSC-W_std',
 'FSC-W_min',
 'FSC-W_25%',
 'FSC-W_50%',
 'FSC-W_75%',
 'FSC-W_max',
 'FSC-A_mean',
 'FSC-A_std',
 'FSC-A_min',
 'FSC-A_25%',
 'FSC-A_50%',
 'FSC-A_75%',
 'FSC-A_max',
 'FSC-H_mean',
 'FSC-H_std',
 'FSC-H_min',
 'FSC-H_25%',
 'FSC-H_50%',
 'FSC-H_75%',
 'FSC-H_max',
 'FJComp-BUV737-A_mean',
 'FJComp-BUV737-A_std',
 'FJComp-BUV737-A_min',
 'FJComp-BUV737-A_25%',
 'FJComp-BUV737-A_50%',
 'FJComp-BUV737-A_75%',
 'FJComp-BUV737-A_max',
 'FJComp-APC-A_mean',
 'FJComp-APC-A_std',
 'FJComp-APC-A_min',
 'FJComp-APC-A_25%',
 'FJComp-APC-A_50%',
 'FJComp-APC-A_75%',
 'FJComp-APC-A_max',
 'FJComp-BV711-A_mean',
 'FJComp-BV711-A_std',
 'FJComp-BV711-A_min',
 'FJComp-BV711-A_25