# Combination of automatic and manual measurements

In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
genes = [
    'Acnat2', 'Acsf3', 'Anpep', 'Apoa4', 'Arvcf',
    'Bsph2',
    'Cenpv', 'Cisd1', 'Cmas', 'Cnot6l', 'Crygn',
    'Dmd', 'Dnajb14',
    'Echs1', 'Ergic2', 
    'Fabp2', 'Foxd2', 
    'Gatb', 'Gins2', 'Gstm1', 'Gstp1',  
    'Ibtk',
    'Kansl1l', 'Klotho',  
    'Lmo1',
    'Mogat1',
    'Pcdhgb2',
    'Rtn4', 
    'Slc5a9', 'Slc6a15', 'Strbp', 'Sytl4', 
    'Tanc2', 'Tnks', 
    'Uggt2', 
    'Zfp280d'
]
print(genes, len(genes))

['Acnat2', 'Acsf3', 'Anpep', 'Apoa4', 'Arvcf', 'Bsph2', 'Cenpv', 'Cisd1', 'Cmas', 'Cnot6l', 'Crygn', 'Dmd', 'Dnajb14', 'Echs1', 'Ergic2', 'Fabp2', 'Foxd2', 'Gatb', 'Gins2', 'Gstm1', 'Gstp1', 'Ibtk', 'Kansl1l', 'Klotho', 'Lmo1', 'Mogat1', 'Pcdhgb2', 'Rtn4', 'Slc5a9', 'Slc6a15', 'Strbp', 'Sytl4', 'Tanc2', 'Tnks', 'Uggt2', 'Zfp280d'] 36


## Filtering

In [3]:
def filter(gene):
    
    filtered_data = pd.read_csv(os.path.join(gene, gene + '_output_labeled.csv'))
    print(len(filtered_data))
    
    # quality filter
    filtered_data = filtered_data[(filtered_data['LVIDd_auto_quality_flag'] == 'high') & (filtered_data['LVIDs_auto_quality_flag'] == 'high')]
    filtered_data = filtered_data[(filtered_data['LVIDs_auto'] != 0) & (filtered_data['LVIDd_auto'] != 0)]

    print(len(filtered_data))
    
    # logical filter
    filtered_data = filtered_data[filtered_data['LVIDd_auto'] > filtered_data['LVIDs_auto']]
    print(len(filtered_data))
    
    filtered_data.to_csv(os.path.join(gene, gene + '_data_filtered.csv'), index=False)

In [4]:
for gene in genes:
    print(gene)
    filter(gene)

Acnat2
4904
2913
2910
Acsf3
475
197
196
Anpep
756
169
169
Apoa4
223
74
74
Arvcf
4205
1782
1779
Bsph2
2160
1261
1261
Cenpv
1221
670
670
Cisd1
4073
1661
1658
Cmas
6280
3116
3112
Cnot6l
10428
5414
5403
Crygn
995
325
325
Dmd
3178
1197
1194
Dnajb14
6315
2696
2680
Echs1
7319
2987
2987
Ergic2
4916
1947
1927
Fabp2
4881
2014
2005
Foxd2
296
191
191
Gatb
5413
2990
2982
Gins2
1306
133
133
Gstm1
4512
2684
2672
Gstp1
1647
755
754
Ibtk
485
154
153
Kansl1l
12935
6028
6022
Klotho
633
349
348
Lmo1
833
419
418
Mogat1
1242
395
394
Pcdhgb2
1042
412
412
Rtn4
446
232
232
Slc5a9
1310
765
765
Slc6a15
5614
2905
2896
Strbp
528
278
278
Sytl4
6181
3084
3065
Tanc2
1084
379
378
Tnks
1704
753
753
Uggt2
6985
3554
3542
Zfp280d
7443
2256
2247


In [5]:
filtered_data_all = pd.read_csv('output_labeled_all_lines.csv')
print(len(filtered_data_all))

# quality_filter
filtered_data_all = filtered_data_all[(filtered_data_all['LVIDd_auto_quality_flag'] == 'high') & (filtered_data_all['LVIDs_auto_quality_flag'] == 'high')]
filtered_data_all = filtered_data_all[(filtered_data_all['LVIDs_auto'] != 0) & (filtered_data_all['LVIDd_auto'] != 0)]
print(len(filtered_data_all))

# logical_filter
filtered_data_all = filtered_data_all[filtered_data_all['LVIDd_auto'] > filtered_data_all['LVIDs_auto']]
print(len(filtered_data_all))

filtered_data_all.to_csv('data_filtered_all_lines.csv', index=False)
filtered_data_all

123968
57139
56985


Unnamed: 0,mouse_id,file,LVIDd_auto,LVIDd_auto_time,LVIDs_auto,LVIDs_auto_time,heart_rate_auto,heart_rate_auto_time,LVIDd_auto_quality_label,LVIDd_auto_quality_flag,LVIDs_auto_quality_label,LVIDs_auto_quality_flag
0,30514245,Acnat2_1vX.dcm,2.724419,0.057500,0.846221,0.015000,750.000000,0.057500,0.999722,high,0.999722,high
1,30514245,Acnat2_1vX.dcm,2.641860,0.137500,0.990698,0.100000,692.307692,0.137500,0.999722,high,0.999722,high
2,30514245,Acnat2_1vX.dcm,2.621221,0.224167,0.846221,0.178333,765.957447,0.224167,0.999945,high,0.999722,high
3,30514245,Acnat2_1vX.dcm,2.683140,0.302500,1.114535,0.261667,720.000000,0.302500,0.999945,high,0.999945,high
4,30514245,Acnat2_1vX.dcm,2.600581,0.385833,0.970058,0.344167,692.307692,0.385833,0.999945,high,0.999945,high
...,...,...,...,...,...,...,...,...,...,...,...,...
123916,30520010,Zfp280d_2v2_108.dcm,2.683140,4.685833,1.279651,4.394167,705.882353,4.685833,0.902861,high,0.998531,high
123917,30520010,Zfp280d_2v2_108.dcm,3.034012,4.770833,1.300291,4.475833,750.000000,4.770833,0.902861,high,0.998531,high
123951,30520106,Zfp280d_2v2_48.dcm,2.579942,3.185833,1.444767,3.412500,699.029126,3.185833,0.571836,high,0.935106,high
123952,30520106,Zfp280d_2v2_48.dcm,2.868895,3.271667,1.031977,3.501667,590.163934,3.271667,0.571836,high,0.935106,high


## Calculation of additional parameter (Volumes, EF, FS)

In [6]:
def calculate_additional_parameter(gene):
    stats = pd.read_csv(os.path.join(gene, gene + '_data_filtered.csv'))
    stats['LV_Vold_auto'] = (7.0/(2.4 + stats['LVIDd_auto']))* np.power(stats['LVIDd_auto'], 3)
    stats['LV_Vols_auto'] = (7.0/(2.4 + stats['LVIDs_auto']))* np.power(stats['LVIDs_auto'], 3)
    stats['EF_auto'] = 100 * ((stats['LV_Vold_auto'] - stats['LV_Vols_auto'])/ stats['LV_Vold_auto'])
    stats['FS_auto'] = 100 * ((stats['LVIDd_auto'] - stats['LVIDs_auto']) / stats['LVIDd_auto'])
    stats.to_csv(os.path.join(gene, gene + '_data_filtered_plus_functional_parameter.csv'), index=False)

In [7]:
for gene in genes:
    print(gene)
    calculate_additional_parameter(gene)

Acnat2
Acsf3
Anpep
Apoa4
Arvcf
Bsph2
Cenpv
Cisd1
Cmas
Cnot6l
Crygn
Dmd
Dnajb14
Echs1
Ergic2
Fabp2
Foxd2
Gatb
Gins2
Gstm1
Gstp1
Ibtk
Kansl1l
Klotho
Lmo1
Mogat1
Pcdhgb2
Rtn4
Slc5a9
Slc6a15
Strbp
Sytl4
Tanc2
Tnks
Uggt2
Zfp280d


In [8]:
stats = pd.read_csv('data_filtered_all_lines.csv')
stats['LV_Vold_auto'] = (7.0/(2.4 + stats['LVIDd_auto']))* np.power(stats['LVIDd_auto'], 3)
stats['LV_Vols_auto'] = (7.0/(2.4 + stats['LVIDs_auto']))* np.power(stats['LVIDs_auto'], 3)
stats['EF_auto'] = 100 * ((stats['LV_Vold_auto'] - stats['LV_Vols_auto'])/ stats['LV_Vold_auto'])
stats['FS_auto'] = 100 * ((stats['LVIDd_auto'] - stats['LVIDs_auto']) / stats['LVIDd_auto'])
stats.to_csv('data_filtered_plus_functional_parameter_all_lines.csv', index=False)

## Combination of manual and automatic measurements

In [9]:
def aggregate_manual_to_automatic_measurements(gene):
    auto = pd.read_csv('data_filtered_plus_functional_parameter_all_lines.csv')
    manual_path = os.path.join(gene, (gene + '_manual_annotation.csv'))
    manual = pd.read_csv(manual_path)
    
    # prepare manual annotated data
    #manual = manual[3:]
    #manual['mouse_id'] = manual['mouse_id'].astype(np.int32)
    #manual['LVIDd'] = manual['LVIDd'].astype(np.float32)
    #manual['LVIDs'] = manual['LVIDs'].astype(np.float32)
    #manual['Heart_rate_Echo'] = manual['Heart_rate_Echo'].astype(np.float32)
    #manual['EJ_fraction'] = manual['EJ_fraction'].astype(np.float32)
    #manual['Fract_shortening'] = manual['Fract_shortening'].astype(np.float32)
    #manual.rename(columns={
    #    'LVIDd' : 'LVIDd_manual',
    #    'LVIDs' : 'LVIDs_manual',
    #    'EJ_fraction' : 'EF_manual', 
    #    'Fract_shortening' : 'FS_manual'}, inplace = True)
    
    # search for special mouse_ids in combined file for automatically annotated data
    analysed_mice = np.unique(manual['mouse_id'])
    auto = auto[auto['mouse_id'].isin(analysed_mice)]

    
    # merge and prepare data
    combi = pd.merge(auto, manual, how= 'left', on = 'mouse_id')
    combi = combi.reset_index(drop = True)
    print(len(analysed_mice),len(np.unique(auto['mouse_id'])), len(np.unique(combi['mouse_id'])))
    
    # calculate differences between manual and automatic measurements
    combi['dif_LVIDd'] = [(combi['LVIDd_auto'][i] - combi['LVIDd_manual'][i]) for i in np.arange(len(combi))]    
    combi['mean_LVIDd'] = [((combi['LVIDd_auto'][i] + combi['LVIDd_manual'][i]) / 2) for i in np.arange(len(combi))]
    combi['dif_LVIDs'] = [(combi['LVIDs_auto'][i] - combi['LVIDs_manual'][i]) for i in np.arange(len(combi))]    
    combi['mean_LVIDs'] = [((combi['LVIDs_auto'][i] + combi['LVIDs_manual'][i]) / 2) for i in np.arange(len(combi))]
    combi['dif_EF'] = [(combi['EF_auto'][i] - combi['EF_manual'][i]) for i in np.arange(len(combi))]
    combi['dif_FS'] = [(combi['FS_auto'][i] - combi['FS_manual'][i]) for i in np.arange(len(combi))]
    
    # save file
    saving_path = os.path.join(gene, (gene + '_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'))
    combi.to_csv(saving_path, index = False)

In [10]:
for gene in genes:
    print(gene)
    aggregate_manual_to_automatic_measurements(gene)

Acnat2
32 32 32
Acsf3
3 3 3
Anpep
4 4 4
Apoa4
1 1 1
Arvcf
23 23 23
Bsph2
6 6 6
Cenpv
3 3 3
Cisd1
73 73 73
Cmas
39 39 39
Cnot6l
38 38 38
Crygn
5 5 5
Dmd
38 38 38
Dnajb14
37 36 36
Echs1
32 32 32
Ergic2
32 32 32
Fabp2
46 46 46
Foxd2
2 2 2
Gatb
38 38 38
Gins2
3 3 3
Gstm1
29 29 29
Gstp1
7 7 7
Ibtk
3 3 3
Kansl1l
36 36 36
Klotho
4 4 4
Lmo1
4 4 4
Mogat1
5 5 5
Pcdhgb2
4 4 4
Rtn4
2 2 2
Slc5a9
6 6 6
Slc6a15
50 50 50
Strbp
3 3 3
Sytl4
37 37 37
Tanc2
5 5 5
Tnks
8 8 8
Uggt2
64 64 64
Zfp280d
50 48 48


In [11]:
filenames =[] 
for gene in genes:
    stats = os.path.join(gene, gene + '_data_filtered_plus_functional_parameter_plus_manual_measurements.csv')
    if filenames  is None:
        filenames = [stats]
    else: 
        filenames = np.append(filenames,stats)
print(filenames)

['Acnat2/Acnat2_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Acsf3/Acsf3_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Anpep/Anpep_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Apoa4/Apoa4_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Arvcf/Arvcf_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Bsph2/Bsph2_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Cenpv/Cenpv_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Cisd1/Cisd1_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Cmas/Cmas_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Cnot6l/Cnot6l_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Crygn/Crygn_data_filtered_plus_functional_parameter_plus_manual_measurements.csv'
 'Dmd/Dmd_data_filtered_plus_functional_parameter_plus_manual_measurements

In [12]:
combined_data = pd.concat([pd.read_csv(f) for f in filenames])
combined_data = combined_data.drop_duplicates(subset=['file', 'LVIDd_auto', 'LVIDd_auto_time','LVIDs_auto', 'LVIDs_auto_time', 'mouse_id', 'heart_rate_auto'])
pd.set_option('display.max_columns', None)
#measurements_per_mouse.duplicated(subset=['file', 'LVIDs_auto','LVIDs_auto_time'])
combined_data

Unnamed: 0,mouse_id,file,LVIDd_auto,LVIDd_auto_time,LVIDs_auto,LVIDs_auto_time,heart_rate_auto,heart_rate_auto_time,LVIDd_auto_quality_label,LVIDd_auto_quality_flag,LVIDs_auto_quality_label,LVIDs_auto_quality_flag,LV_Vold_auto,LV_Vols_auto,EF_auto,FS_auto,strain,sex,genotype,type,bw,EF_manual,FS_manual,LVIDd_manual,LVIDs_manual,dif_LVIDd,mean_LVIDd,dif_LVIDs,mean_LVIDs,dif_EF,dif_FS
0,30514245,Acnat2_1vX.dcm,2.724419,0.057500,0.846221,0.015000,750.000000,0.057500,0.999722,high,0.999722,high,27.623261,1.306686,95.269617,68.939394,C57BL/6N,m,mutant,het (Acnat2),25.1,92.55,63.40,2.67,1.00,0.054419,2.697209,-0.153779,0.923110,2.719617,5.539394
1,30514245,Acnat2_1vX.dcm,2.641860,0.137500,0.990698,0.100000,692.307692,0.137500,0.999722,high,0.999722,high,25.599816,2.007393,92.158565,62.500000,C57BL/6N,m,mutant,het (Acnat2),25.1,92.55,63.40,2.67,1.00,-0.028140,2.655930,-0.009302,0.995349,-0.391435,-0.900000
2,30514245,Acnat2_1vX.dcm,2.621221,0.224167,0.846221,0.178333,765.957447,0.224167,0.999945,high,0.999722,high,25.107276,1.306686,94.795588,67.716535,C57BL/6N,m,mutant,het (Acnat2),25.1,92.55,63.40,2.67,1.00,-0.048779,2.645610,-0.153779,0.923110,2.245588,4.316535
3,30514245,Acnat2_1vX.dcm,2.683140,0.302500,1.114535,0.261667,720.000000,0.302500,0.999945,high,0.999945,high,26.600867,2.757472,89.633900,58.461538,C57BL/6N,m,mutant,het (Acnat2),25.1,92.55,63.40,2.67,1.00,0.013140,2.676570,0.114535,1.057267,-2.916100,-4.938462
4,30514245,Acnat2_1vX.dcm,2.600581,0.385833,0.970058,0.344167,692.307692,0.385833,0.999945,high,0.999945,high,24.620048,1.896068,92.298682,62.698413,C57BL/6N,m,mutant,het (Acnat2),25.1,92.55,63.40,2.67,1.00,-0.069419,2.635291,-0.029942,0.985029,-0.251318,-0.701587
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4132,30520010,Zfp280d_2v2_108.dcm,2.683140,4.685833,1.279651,4.394167,705.882353,4.685833,0.902861,high,0.998531,high,26.600867,3.986265,85.014529,52.307692,C57BL/6N,f,mutant,hom (Zfp280d),21.5,65.53,35.00,3.26,2.01,-0.576860,2.971570,-0.730349,1.644826,19.484529,17.307692
4133,30520010,Zfp280d_2v2_108.dcm,3.034012,4.770833,1.300291,4.475833,750.000000,4.770833,0.902861,high,0.998531,high,35.977352,4.158949,88.440092,57.142857,C57BL/6N,f,mutant,hom (Zfp280d),21.5,65.53,35.00,3.26,2.01,-0.225988,3.147006,-0.709709,1.655145,22.910092,22.142857
4134,30520106,Zfp280d_2v2_48.dcm,2.579942,3.185833,1.444767,3.412500,699.029126,3.185833,0.571836,high,0.935106,high,24.138125,5.490625,77.253306,44.000000,C57BL/6N,f,mutant,hom (Zfp280d),18.8,85.63,53.17,2.75,1.33,-0.170058,2.664971,0.114767,1.387384,-8.376694,-9.170000
4135,30520106,Zfp280d_2v2_48.dcm,2.868895,3.271667,1.031977,3.501667,590.163934,3.271667,0.571836,high,0.935106,high,31.370583,2.241627,92.854365,64.028777,C57BL/6N,f,mutant,hom (Zfp280d),18.8,85.63,53.17,2.75,1.33,0.118895,2.809448,-0.298023,1.180988,7.224365,10.858777


In [13]:
combined_data.to_csv( "data_filtered_plus_functional_parameter_plus_manual_measurements_all_lines.csv", encoding='utf-8-sig', index=False)

In [14]:
filenames =[] 
for gene in genes:
    stats = os.path.join(gene, gene + '_image_quality_per_file.csv')
    if filenames  is None:
        filenames = [stats]
    else: 
        filenames = np.append(filenames,stats)
print(filenames)

['Acnat2/Acnat2_image_quality_per_file.csv'
 'Acsf3/Acsf3_image_quality_per_file.csv'
 'Anpep/Anpep_image_quality_per_file.csv'
 'Apoa4/Apoa4_image_quality_per_file.csv'
 'Arvcf/Arvcf_image_quality_per_file.csv'
 'Bsph2/Bsph2_image_quality_per_file.csv'
 'Cenpv/Cenpv_image_quality_per_file.csv'
 'Cisd1/Cisd1_image_quality_per_file.csv'
 'Cmas/Cmas_image_quality_per_file.csv'
 'Cnot6l/Cnot6l_image_quality_per_file.csv'
 'Crygn/Crygn_image_quality_per_file.csv'
 'Dmd/Dmd_image_quality_per_file.csv'
 'Dnajb14/Dnajb14_image_quality_per_file.csv'
 'Echs1/Echs1_image_quality_per_file.csv'
 'Ergic2/Ergic2_image_quality_per_file.csv'
 'Fabp2/Fabp2_image_quality_per_file.csv'
 'Foxd2/Foxd2_image_quality_per_file.csv'
 'Gatb/Gatb_image_quality_per_file.csv'
 'Gins2/Gins2_image_quality_per_file.csv'
 'Gstm1/Gstm1_image_quality_per_file.csv'
 'Gstp1/Gstp1_image_quality_per_file.csv'
 'Ibtk/Ibtk_image_quality_per_file.csv'
 'Kansl1l/Kansl1l_image_quality_per_file.csv'
 'Klotho/Klotho_image_quality_

In [15]:
combined_image_quality = pd.concat([pd.read_csv(f) for f in filenames])
pd.set_option('display.max_columns', None)
combined_image_quality

Unnamed: 0,mouse_id,file_name,high_image_quality,low_image_quality
0,30514213,Acnat2_1vX_111,75.299760,24.700240
1,30514213,Acnat2_1vX_117,45.879733,54.120267
2,30514213,Acnat2_1vX_118,78.828366,21.171634
3,30514213,Acnat2_1vX_116,71.766318,28.233682
4,30514214,Acnat2_1vX_129,17.677287,82.322713
...,...,...,...,...
96,30520204,Zfp280d_2v2_127,100.000000,0.000000
97,30520204,Zfp280d_2v2_126,57.656732,42.343268
98,30520206,Zfp280d_2v2_37,31.192189,68.807811
99,30520206,Zfp280d_2v2_34,0.000000,100.000000


In [16]:
combined_image_quality.to_csv( "image_quality_all_files.csv", encoding='utf-8-sig', index=False)