## Sensitivity Calculation
![Sensitivoty_Calculation](sensitivity_calculation.png)

### Ground Truth Input Structure
[start, end, length, copy_number]

In [38]:
ground_truths = [[4546217, 4712431, 166215, 8],
 [4712432, 4713331, 900, 8],
 [9423992, 9461650, 37659, 6],
 [13230409, 13395982, 165574, 5],
 [16742495, 16774178, 31684, 2],
 [21248357, 21381356, 133000, 8],
 [27880363, 28164341, 283979, 3],
 [31151252, 31361623, 210372, 7],
 [39891284, 39896951, 5668, 8],
 [43904663, 43911760, 7098, 7],
 [47552110, 47662433, 110324, 6]]

### CNVPytor Result Input Structure
[type, chromosome, start, end, length, cnv_level, q0]

In [39]:
cnv_pytor_calls = [
    ['duplication', 'chr22', 1306001, 1325000, 19000, 1.3450524335588785, 0.0],
    ['duplication', 'chr22', 2926001, 2963000, 37000, 3.0065909463957716, 0.0],
    ['duplication', 'chr22', 4546001, 4713000, 167000, 1.603548412692582, 0.0],
    ['duplication', 'chr22', 6655001, 6662000, 7000, 2.304815518314925, 0.0],
    ['duplication', 'chr22', 8040001, 8048000, 8000, 1.8758709383896488, 0.0],
    ['duplication', 'chr22', 9424001, 9462000, 38000, 1.4274356086929143, 0.0],
    ['duplication', 'chr22', 13231001, 13396000, 165000, 1.3551499097181512, 0.0],
    ['duplication', 'chr22', 13481001, 13514000, 33000, 3.9775744327389453, 0.0],
    ['duplication', 'chr22', 18705001, 18881000, 176000, 2.8307061227598482, 0.0001409699844933017],
    ['duplication', 'chr22', 19259001, 19268000, 9000, 3.299749801238494, 0.0],
    ['duplication', 'chr22', 20609001, 20637000, 28000, 1.4968209446549812, 0.00019653186764233822],
    ['duplication', 'chr22', 21248001, 21381000, 133000, 1.5518808805623976, 0.00012213659384936043],
    ['duplication', 'chr22', 21497001, 21624000, 127000, 1.3935199965360836, 0.0004641057787399061],
    ['duplication', 'chr22', 21651001, 21670000, 19000, 1.392935010280004, 0.0004288164665523156],
    ['duplication', 'chr22', 23608001, 23613000, 5000, 2.2395907093921554, 0.0],
    ['duplication', 'chr22', 24196001, 24241000, 45000, 2.605180827195409, 5.622610390584002e-05],
    ['duplication', 'chr22', 25148001, 25312000, 164000, 1.319851913126888, 0.00013501034867059386],
    ['duplication', 'chr22', 28466001, 28599000, 133000, 3.1120633931386754, 4.164027770035522e-05],
    ['duplication', 'chr22', 28979001, 28986000, 7000, 1.6285899033020252, 0.00014628794343532855],
    ['duplication', 'chr22', 30804001, 30979000, 175000, 3.10641204117398, 2.22266563279038e-05],
    ['duplication', 'chr22', 31151001, 31362000, 211000, 1.4792009818563616, 4.94008819821751e-05],
    ['duplication', 'chr22', 31875001, 32248000, 373000, 1.3163872423636338, 8.079583901429077e-05],
    ['duplication', 'chr22', 35940001, 36200000, 260000, 2.274153375779643, 3.832388166707016e-05],
    ['duplication', 'chr22', 36466001, 36571000, 105000, 1.4034107114914411, 0.00011599756030937672],
    ['duplication', 'chr22', 37688001, 37697000, 9000, 1.6320950738500621, 0.0],
    ['duplication', 'chr22', 40850001, 40868000, 18000, 1.4794298343926993, 2.0793695351569404e-05],
    ['duplication', 'chr22', 43505001, 43899000, 394000, 1.466877844689757, 0.00027259394434445654],
    ['duplication', 'chr22', 44149001, 44465000, 316000, 2.2604710289320713, 3.5430020318346436e-05],
    ['duplication', 'chr22', 45430001, 45463000, 33000, 1.5578689660366496, 9.606045404574612e-05],
    ['duplication', 'chr22', 46380001, 46888000, 508000, 1.607354194514808, 4.951160813034132e-05],
    ['duplication', 'chr22', 47552001, 47662000, 110000, 1.3947272296965654, 1.7921436008824514e-05]
]

In [40]:
def get_length_category(original_call_size: int)->str:
    if original_call_size < 1000:
        return "< 1K"
    elif original_call_size < 10000:
        return "1K - 10K"
    elif original_call_size < 100000:
        return "10K - 100K"
    elif original_call_size < 1000000:
        return "100K - 1M"
    else:
        return "> 1M"

In [41]:
def get_event_category(original_call_cnv: str)->str:
    if original_call_cnv == 0:
        return "hetelogygous_deletion"
    
    elif original_call_cnv == 2:
        return "amplification"
    
    elif original_call_cnv >= 3:
        return "high_level_amplification"

In [42]:
def get_overlap(cnv_start, cnv_end, gt_start, gt_end):
    return max(0, min(cnv_end, gt_end) - max(cnv_start, gt_start))

In [43]:
ground_truth_len_dict = dict()
overlap_len_dict = dict()


ground_truth_event_dict = dict()
overlap_event_dict = dict()

In [44]:
for ground_truth in ground_truths:
    len_category = get_length_category(ground_truth[2])
    event_category = get_event_category(ground_truth[3])
    ground_truth_start = ground_truth[0]
    ground_truth_end = ground_truth[1]
    ground_truth_len = ground_truth[2]
    ground_truth_copy_number = ground_truth[3]

    ground_truth_len_dict[len_category] = (ground_truth_len_dict.get(len_category, 0) + ground_truth[2]) * ground_truth_copy_number
    ground_truth_event_dict[event_category] = (ground_truth_event_dict.get(event_category, 0) + ground_truth[2]) * ground_truth_copy_number

    for call in cnv_pytor_calls:
        call_start = call[2]
        call_end = call[3]
        call_len = call[4]
        call_copy_number = int(call[5])
        overlap = 0

        if call[0] == "deletion" and ground_truth[3] == 0:
            overlap = get_overlap(call_start, call_end, ground_truth_start, ground_truth_end)

        elif call[0] == "duplication" and ground_truth[3] != 0:
            overlap = get_overlap(call_start, call_end, ground_truth_start, ground_truth_end) * min(call_copy_number, ground_truth_copy_number)


        if overlap > 0:
            overlap_len_dict[len_category] = overlap_len_dict.get(len_category, 0) + overlap
            overlap_event_dict[event_category] = overlap_event_dict.get(event_category, 0) + overlap


            print("Debug: ", call, ground_truth, overlap)
        
    

Debug:  ['duplication', 'chr22', 4546001, 4713000, 167000, 1.603548412692582, 0.0] [4546217, 4712431, 166215, 8] 166214
Debug:  ['duplication', 'chr22', 4546001, 4713000, 167000, 1.603548412692582, 0.0] [4712432, 4713331, 900, 8] 568
Debug:  ['duplication', 'chr22', 9424001, 9462000, 38000, 1.4274356086929143, 0.0] [9423992, 9461650, 37659, 6] 37649
Debug:  ['duplication', 'chr22', 13231001, 13396000, 165000, 1.3551499097181512, 0.0] [13230409, 13395982, 165574, 5] 164981
Debug:  ['duplication', 'chr22', 21248001, 21381000, 133000, 1.5518808805623976, 0.00012213659384936043] [21248357, 21381356, 133000, 8] 132643
Debug:  ['duplication', 'chr22', 31151001, 31362000, 211000, 1.4792009818563616, 4.94008819821751e-05] [31151252, 31361623, 210372, 7] 210371
Debug:  ['duplication', 'chr22', 47552001, 47662000, 110000, 1.3947272296965654, 1.7921436008824514e-05] [47552110, 47662433, 110324, 6] 109890


In [45]:
for len_category_category in ["< 1K", "1K - 10K", "10K - 100K", "100K - 1M", "> 1M"]:
    print(f"Category: {len_category_category}")
    print(f"Overlap length: {overlap_len_dict.get(len_category_category, 0)}")
    print(f"Ground truth length: {ground_truth_len_dict.get(len_category_category,0)}")
    print()

Category: < 1K
Overlap length: 568
Ground truth length: 7200

Category: 1K - 10K
Overlap length: 0
Ground truth length: 367094

Category: 10K - 100K
Overlap length: 37649
Ground truth length: 515276

Category: 100K - 1M
Overlap length: 784099
Ground truth length: 7715624682

Category: > 1M
Overlap length: 0
Ground truth length: 0



In [46]:
for event_category in ["hetelogygous_deletion", "amplification", "high_level_amplification"]:
    print(f"Category: {event_category}")
    print(f"Overlap length: {overlap_event_dict.get(event_category, 0)}")
    print(f"Ground truth length: {ground_truth_event_dict.get(event_category,0)}")
    print()

Category: hetelogygous_deletion
Overlap length: 0
Ground truth length: 0

Category: amplification
Overlap length: 0
Ground truth length: 63368

Category: high_level_amplification
Overlap length: 822316
Ground truth length: 18147114924396

