In [1]:
import os
from openpyxl import load_workbook
import pandas as pd
import numpy as np
from chord import Chord
import math

In [5]:
# basic functions

def data_deal(path, fileList, paraList, excelName, alignment):
    data_final = {}
    len_min = 999999
    for fileName in fileList:
        data_tmp = pd.read_excel(path + fileName)
        data_tmp = data_tmp[paraList]
        def func(x):
            if ((isinstance(x, float) and x<=0) or (isinstance(x, int) and x<=0) or np.isnan(x)): 
                return np.nan
            else:
                return x
        data_tmp = data_tmp.applymap(func).dropna(axis = 0).reset_index(drop=True)
        data_tmp = deal_outlier(data_tmp).reset_index(drop=True)
        len_min = data_tmp.shape[0] if data_tmp.shape[0] < len_min else len_min
        sheet_name = fileName.split('.')[:-1]
        data_final.update({sheet_name: data_tmp})
    for fileName in fileList:
        if alignment == 1:
            data_final[sheet_name] = data_alignment(data_final[sheet_name], len_min).reset_index(drop=True)
    write_excel("/".join(path.split("/")[:-2]), data_final, excelName, paraList)
    return


def deal_outlier(data_orig):
    index_list = []
    for key in data_orig.columns:
        Q1 = np.percentile(data_orig[key], 25)
        Q3 = np.percentile(data_orig[key], 75)
        IQR = Q3 - Q1
        inlier = 1.5 * IQR
        outlier = 3 * IQR
        for index, value in enumerate(data_orig[key]):
            if value < Q1-outlier or value > Q3+outlier:
                index_list.append(index)
        data_new = data_orig.drop(index = index_list)
    return data_new
    


def data_alignment(df_T, lenth_min, group_name):
    if group_name in ['Control', 'T2DMInsulin2W', 'T2DMLiraglutide2W']:
        para = df_T.columns[0]
        df_T = df_T.loc[sorted(np.argsort(df_T[para])[-lenth_min:])]
    elif group_name in ['T2DM', 'T2DMInsulin1W', 'T2DMLiraglutide1W']:
        para = df_T.columns[0]
        df_T = df_T.loc[sorted(np.argsort(df_T[para])[:lenth_min])]
    return df_T


def write_excel(path, data_dict, excelName, paraList):
    data_para = {}
    for para in paraList:
        data_para[para] = {}
    book = load_workbook(path + excelName)
    writer = pd.ExcelWriter(path + excelName, engine='openpyxl')
    for key, value in data_dict.items():
        for para in paraList:
            data_para[para][key] = value[para]
        writer.book = book
        value.to_excel(writer, key, index = False)
        writer.save()
    for para in paraList:
        writer.book = book
        pd.DataFrame(data_para[para]).to_excel(writer, para.replace("[", "(").replace("]", ")").replace("/", "|"), index = False)
        writer.save()
    writer.close() 


def lcm(x, y):
    if x > y:
        greater = x
    else:
        greater = y
    while(True):
        if((greater % x == 0) and (greater % y == 0)):
            lcm = greater
            break
        greater += 1
    return lcm

In [None]:
# Processing data for drawing pie charts

path = "Your Path"
fileList = os.listdir(path)

list_pie = ["Oxygenized hemoglobin concentration [µM]", "Reduced hemoglobin concentration [µM]"]    
data_deal(path, fileList, list_pie, "/dataForPie.xlsx", alignment = 1)

In [None]:
# Processing data for subsequent processing

para_list = ['RBC tissue fraction [%]', 'Total hemoglobin concentration [µM]', 'Oxygenized hemoglobin concentration [µM]', 'Reduced hemoglobin concentration [µM]', 'Oxygen saturation [%]', 'Speed resolved perfusion [% RBC x mm/s], < 1 mm/s',  '1-10 mm/s', '> 10 mm/s', 'Total perf', 'Conventional perfusion [PU]']
data_deal(path, fileList, para_list, "/dataProcessed.xlsx", alignment = 0)

In [None]:
# Processing data for drawing bubble charts

def data_alignment2(dict_T, lenth_min):
    for key in dict_T.keys():
        if key in ['Control', 'T2DMInsulin2W', 'T2DMLiraglutide2W']:
            dict_T[key] = dict_T[key].loc[sorted(np.argsort(dict_T[key])[-lenth_min:])].reset_index(drop=True)
        elif key in ['T2DM', 'T2DMInsulin1W', 'T2DMLiraglutide1W']:
            dict_T[key] = dict_T[key].loc[sorted(np.argsort(dict_T[key])[:lenth_min])].reset_index(drop=True)
    return dict_T

group_list = ['Control', 'T2DM', 'T2DMInsulin1W', 'T2DMInsulin2W', 'T2DMLiraglutide1W', 'T2DMLiraglutide2W']

book = load_workbook(path + '/dataForBubble.xlsx')
writer = pd.ExcelWriter(path + '/dataForBubble.xlsx', engine='openpyxl')

for para in para_list:
    data_dict = {}
    len_min = 9999
    list_num = "Number of samples per group"
    for index, group in enumerate(group_list):
        use_columns = list(range(index*list_num, (index+1)*list_num))
        data_tmp = pd.read_excel(path + "/dataProcessed.xlsx", sheet_name = para, usecols=use_columns)
        list_tmp = []
        for key in data_tmp.columns:
            list_tmp.extend([x for x in data_tmp[key] if ~np.isnan(x)])
        len_min = len(list_tmp) if len(list_tmp) < len_min else len_min
        data_dict[group] = pd.Series(list_tmp)
    data_dict = data_alignment2(data_dict, len_min)
    writer.book = book
    pd.DataFrame(data_dict).to_excel(writer, para, index = False)
    print(f"{para} down")
writer.close() 

            

In [None]:
# Processing data for drawing heat map

book = load_workbook(path + '/dataForHeatmap.xlsx')
writer = pd.ExcelWriter(path + '/dataForHeatmap.xlsx', engine='openpyxl')

for index, group in enumerate(group_list):
    data_dict = {}
    len_min = 9999
    list_num = "Number of samples per group"
    for para in para_list:
        data_dict[para] = []
        
    for i in range(list_num):
        sheet_name = fileList[index*list_num + i].split('.')[:-1]
        data_tmp = pd.read_excel(path + "/dataProcessed.xlsx", sheet_name = sheet_name)
        for para in data_tmp.columns:
            data_dict[para].extend(data_tmp[para].values.tolist())
    
    writer.book = book
    pd.DataFrame(data_dict).to_excel(writer, group, index = False)
    print(f"{group} down")
writer.close() 

In [None]:
# Function for drawing chord diagram

Chord.user = "your username"
Chord.key = "your key"

def draw_chord(chord_data, names, colors, group, part_num, path):
    chord = Chord(chord_data,
                  names,
                  colors=colors,
                  opacity=0.8, 
                  padding=0.01, 
                  width=2500, 
                  label_color="#454545", 
                  curved_labels = True, 
                  wrap_labels=False, 
                  margin=70, 
                  reverse_gradients=False,
                  credit=False,
                  font_size="36px",
                  font_size_large="36px",
                  details=[],
                  details_thumbs=[], 
                  thumbs_width=85,
                  thumbs_margin=5,
                  thumbs_font_size=14,
                  popup_width=350,
                  details_separator=", ",
                  divide=True,
                  divide_idx=part_num,
                  divide_size=0.5, 
                  divide_left_label="SO2(%)",
                  divide_right_label="BP(PU)",
                  instances=0,
                  conjunction="and",
                  verb="occur together in",
                  noun="instances", 
                  symmetric=True,
                  title=group,
                  arc_numbers=False, 
                  inner_radius_scale=0.39,
                  outer_radius_scale=1.1, 
                  allow_download=True,        
                 )
    chord.to_html(path + '/chord/' + group + '.html')
    chord.show()

In [None]:
# Function for drawing chord diagram

def dealAndchord(data_select, path, filename, path_to):
    groups = ['Control', 'T2DM', 'Ins 1w', 'Ins 2w', 'Lirag 1w', 'Lirag 2w']
    colors = ['#e41a1c', '#4F3F84', "#7400B8", '#984ea3',"#5E60CE", "#5684D6", "#56CFE1", "#64DFDF", "#80FFDB","#b7f8db" ]
    colors.reverse()

    for index, value in enumerate(groups):
        colors_final = []
        print(value)
        names = []
        matrx_shape = []
        for i in data_select.keys():
            matrx_shape.append(data_select[i][2])
        matrx_count = np.array([[0] * matrx_shape[0] for i in range(matrx_shape[1])])
        data_o = []
        flag = 0
        for i in data_select.keys():
            usecols = range(index*9, index*9+9)
            data_tmp = pd.read_excel(path + filename, sheet_name = i, usecols=usecols)
            divider = (data_select[i][1] - data_select[i][0]) / data_select[i][2]
            if flag == 0:
                start = data_select[i][0]
                while start < data_select[i][1]:
                    names.append(str(start) + '-' + str(start+divider))
                    start += divider
                    colors_final += colors
                flag = 1
            else:
                start = data_select[i][1]
                colors.reverse()
                while start > data_select[i][0]:
                    names.append(str(start) + '-' + str(start-divider))
                    start -= divider
                    colors_final += colors
                colors.reverse()
            data_o.append([math.ceil((a-data_select[i][0])/divider) for a in data_tmp.values[~np.isnan(data_tmp.values)]])
        for x,y in zip(data_o[0], data_o[1]):
            x = matrx_shape[0] if x > matrx_shape[0] else x
            y = matrx_shape[1] if y > matrx_shape[1] else y
            matrx_count[y-1][matrx_shape[0]-x] += 1
        chord_data = np.append(np.append(np.array([[0] * matrx_shape[0] for i in range(matrx_shape[0])]), matrx_count.T,axis=1),np.append(matrx_count, np.array([[0] * matrx_shape[1] for i in range(matrx_shape[1])]) ,axis=1),axis=0).tolist()
        draw_chord(chord_data, names, colors_final, value, matrx_shape[0], path_to)
    data_demo = np.array([[10] * (matrx_shape[0]+matrx_shape[1]) for i in range(matrx_shape[0]+matrx_shape[1])]).tolist()
    print(colors_final)
    draw_chord(data_demo, names, colors_final, 'demo', matrx_shape[0], path_to)


In [None]:
data_select = {"Conventional perfusion (PU)":[0,1000,50], "Oxygen saturation (%)":[0,100,50]}
filename = path + "/dataProcessed.xlsx"
dealAndchord(data_select, path, filename, path)

In [None]:
# Comparison of four dimensionless methods
# And processing data for drawing 3D images
        
for data_group in group_list:
    data_orig = pd.read_excel(path+"/dataProcessed.xlsx", header=0, sheet_name=data_group)
    features = data_orig.columns.values.tolist()
    column_num = data_orig.shape[1]
    dict_tmp = {}
    for key in features:
        dict_tmp[key] = [x for x in data_orig[key] if ~np.isnan(x)]

    data_lcm = 1
    for key in features:
        data_lcm = lcm(len(dict_tmp[key]), data_lcm)

    dict_new = {}
    for key in features:
        tmp = []
        for i in range(len(dict_tmp[key])):
            for k in range(int(data_lcm/len(dict_tmp[key]))):
                tmp.append(dict_tmp[key][i])
        dict_new[key] = tmp



    # Min-Max
    data_total = []
    for i in range(data_lcm):
        for index, value in enumerate(features):
            data_total.append([(i+1)*60/data_lcm, index, (dict_new[value][i]-min(dict_new[value]))/(max(dict_new[value])-min(dict_new[value]))])
    
    # Z-score
    data_total2 = []
    max_num = 0
    min_num = 1
    for i in range(data_lcm):
        for index, value in enumerate(features):
            if (max(dict_new[value])-np.mean(dict_new[value]))/np.std(dict_new[value]) > max_num:
                max_num = (max(dict_new[value])-np.mean(dict_new[value]))/np.std(dict_new[value])
            if (min(dict_new[value])-np.mean(dict_new[value]))/np.std(dict_new[value]) < min_num:
                min_num = (min(dict_new[value])-np.mean(dict_new[value]))/np.std(dict_new[value])
            data_total2.append([(i+1)*60/data_lcm, index, (dict_new[value][i]-np.mean(dict_new[value]))/np.std(dict_new[value])])

    # Median
    data_total3 = []
    max_num = 0
    for i in range(data_lcm):
        for index, value in enumerate(features):
            if dict_new[value][i]/np.median(dict_new[value]) > max_num:
                max_num = dict_new[value][i]/np.median(dict_new[value])
            data_total3.append([(i+1)*60/data_lcm, index, dict_new[value][i]/np.median(dict_new[value])])

    # Norm
    data_total4 = []
    max_num = 0
    for i in range(data_lcm):
        for index, value in enumerate(features):
            if dict_new[value][i]/np.linalg.norm(dict_new[value]) > max_num:
                max_num = dict_new[value][i]/np.linalg.norm(dict_new[value])
            data_total4.append([(i+1)*60/data_lcm, index, dict_new[value][i]/np.linalg.norm(dict_new[value])])