# Overlapping analysis

In [1]:
import os
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import pandas as pd

from snl_stats_extraction_data import *
from snl_stats_visualization_database import *
DIR, databases_pair_paths, databases_paths, tier_lists, databases, databases_pairs, tiers = get_parameters()

### Parameters

In [2]:
databases_name = [key.replace('_paths','').upper() for key in databases.keys()]
databases_pairs = [key for key in databases_pairs.keys()]
expressions = ["Smiles_0", "Laughs_0"]
laughs_intensities = tier_lists['Laughs_0']
smiles_intensities = tier_lists['Smiles_0']

To explain well before starting the analysis, when I talk about overlapping between person A to person B, this means that we are looking at all the segments of person B which overlap a segment of A and this for all the segments of the tier concerned for the "A" files. When we change direction (so person B to person A), we just switched the direction of the files in the overlapping function. (A and B are the pair files)
- person A to person B: {(segmentA: (segmentB n°1), (segmentB n°N), etc),...}
- person B to person A: {(segmentB: (segmentA n°1), (segmentA n°N), etc),...}

### Is there any overlap during the interactions (between the pairs files) ?

Let's see of there is any overlap between the role of person A to person B.


In [3]:
lstA = {}
lstB = {}
overlapping_segments_dict = {}
for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}
        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1] 
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            lstA_tier = get_tier_from_file(filepath_A, "Role")
            lstB_tier = get_tier_from_file(filepath_B, "Role")

            if "Role" in lstA:
                lstA["Role"].extend(lstA_tier["Role"])
            else:
                lstA["Role"] = lstA_tier["Role"]

            if "Role" in lstB:
                lstB["Role"].extend(lstB_tier["Role"])
            else:
                lstB["Role"] = lstB_tier["Role"]

            overlapping_segments = get_overlapping_segments(lstA_tier["Role"], lstB_tier["Role"])
            pair_dict["Role"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = pair_dict

        overlapping_segments_dict[database] = dataset_dict

overlap_segments_set = set()
overlap_count_dict = {}
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count = 0
    overlap_count_spk_lsn = 0
    overlap_count_lsn_spk = 0
    for pair_dict in dataset_dict.values():
        segments = pair_dict["Role"]["Segments"]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_count += 1
                    overlap_segments_set.add(segment_key)
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        overlap_count_spk_lsn += 1
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        overlap_count_lsn_spk += 1
                break 
        overlap_count_dict[database] = {
        'Overlap Count for Role in general': overlap_count,
        'Overlap Count for A spk / B lsn': overlap_count_spk_lsn,
        'Overlap Count for A lsn / B spk': overlap_count_lsn_spk
        }

df_overlap_count = pd.DataFrame(overlap_count_dict).T.reset_index()
df_overlap_count = df_overlap_count.rename(columns={'index': 'Database'})

display(df_overlap_count)

Unnamed: 0,Database,Overlap Count for Role in general,Overlap Count for A spk / B lsn,Overlap Count for A lsn / B spk
0,CCDB,43,12,25
1,IFADV,79,18,41
2,NDC,165,67,68


In [4]:
lstA = {}
lstB = {}
overlapping_segments_dict = {}
for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}
        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1] 
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            lstA_tier = get_tier_from_file(filepath_A, "Role")
            lstB_tier = get_tier_from_file(filepath_B, "Role")

            if "Role" in lstA:
                lstA["Role"].extend(lstA_tier["Role"])
            else:
                lstA["Role"] = lstA_tier["Role"]

            if "Role" in lstB:
                lstB["Role"].extend(lstB_tier["Role"])
            else:
                lstB["Role"] = lstB_tier["Role"]

            overlapping_segments = get_overlapping_segments(lstA_tier["Role"], lstB_tier["Role"])
            pair_dict["Role"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = pair_dict

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
overlap_count_dict = {}
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count = 0
        overlap_count_spk_lsn = 0
        overlap_count_lsn_spk = 0
        segments = pair_dict["Role"]["Segments"]

        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_count += 1
                    overlap_segments_set.add(segment_key)
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        overlap_count_spk_lsn += 1
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        overlap_count_lsn_spk += 1
                break 
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for Role in general': overlap_count,
            'Overlap Count for A spk / B lsn': overlap_count_spk_lsn,
            'Overlap Count for A lsn / B spk': overlap_count_lsn_spk
        })
    
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)


**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for A spk / B lsn,Overlap Count for A lsn / B spk
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,15,2,12
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,2,0,1
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,3,1,1
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,6,2,3
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,3,1,1
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,6,2,4
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,8,4,3


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for A spk / B lsn,Overlap Count for A lsn / B spk
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,10,1,3
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,16,3,9
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,6,2,4
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,10,1,6
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,7,1,4
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,10,1,6
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,14,6,7
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,6,3,2


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for A spk / B lsn,Overlap Count for A lsn / B spk
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,10,3,3
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,9,3,3
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,17,4,11
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,10,6,4
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,24,9,3
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,4,2,2
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,5,3,2
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,9,3,4
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,5,3,2
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,24,11,12


Same things for B to A.

In [5]:
lstA = {}
lstB = {}
overlapping_segments_dict = {}
for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}
        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1] 
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            lstA_tier = get_tier_from_file(filepath_A, "Role")
            lstB_tier = get_tier_from_file(filepath_B, "Role")

            if "Role" in lstA:
                lstA["Role"].extend(lstA_tier["Role"])
            else:
                lstA["Role"] = lstA_tier["Role"]

            if "Role" in lstB:
                lstB["Role"].extend(lstB_tier["Role"])
            else:
                lstB["Role"] = lstB_tier["Role"]

            overlapping_segments = get_overlapping_segments(lstB_tier["Role"], lstA_tier["Role"])
            pair_dict["Role"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = pair_dict

        overlapping_segments_dict[database] = dataset_dict

overlap_segments_set = set()
overlap_count_dict = {}
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count = 0
    overlap_count_spk_lsn = 0
    overlap_count_lsn_spk = 0
    for pair_dict in dataset_dict.values():
        segments = pair_dict["Role"]["Segments"]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_count += 1
                    overlap_segments_set.add(segment_key)
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        overlap_count_spk_lsn += 1
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        overlap_count_lsn_spk += 1
                break 
        overlap_count_dict[database] = {
        'Overlap Count for Role in general': overlap_count,
        'Overlap Count for B spk / A lsn': overlap_count_spk_lsn,
        'Overlap Count for B lsn / A spk': overlap_count_lsn_spk
        }

df_overlap_count = pd.DataFrame(overlap_count_dict).T.reset_index()
df_overlap_count = df_overlap_count.rename(columns={'index': 'Database'})

display(df_overlap_count)

Unnamed: 0,Database,Overlap Count for Role in general,Overlap Count for B spk / A lsn,Overlap Count for B lsn / A spk
0,CCDB,44,8,23
1,IFADV,81,14,38
2,NDC,162,57,60


In [6]:
lstA = {}
lstB = {}
overlapping_segments_dict = {}
for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}
        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1] 
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            lstA_tier = get_tier_from_file(filepath_A, "Role")
            lstB_tier = get_tier_from_file(filepath_B, "Role")

            if "Role" in lstA:
                lstA["Role"].extend(lstA_tier["Role"])
            else:
                lstA["Role"] = lstA_tier["Role"]

            if "Role" in lstB:
                lstB["Role"].extend(lstB_tier["Role"])
            else:
                lstB["Role"] = lstB_tier["Role"]

            overlapping_segments = get_overlapping_segments(lstB_tier["Role"], lstA_tier["Role"])
            pair_dict["Role"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = pair_dict

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
overlap_count_dict = {}
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count = 0
        overlap_count_spk_lsn = 0
        overlap_count_lsn_spk = 0
        segments = pair_dict["Role"]["Segments"]

        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_count += 1
                    overlap_segments_set.add(segment_key)
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        overlap_count_spk_lsn += 1
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        overlap_count_lsn_spk += 1
                break 
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for Role in general': overlap_count,
            'Overlap Count for B spk / A lsn': overlap_count_spk_lsn,
            'Overlap Count for B lsn / A spk': overlap_count_lsn_spk
        })
    
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)

**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for B spk / A lsn,Overlap Count for B lsn / A spk
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,15,1,10
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,2,0,1
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,3,2,1
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,7,1,2
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,3,1,2
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,6,0,4
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,8,3,3


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for B spk / A lsn,Overlap Count for B lsn / A spk
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,11,3,5
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,15,1,8
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,6,0,3
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,10,3,6
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,8,2,4
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,11,3,6
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,14,1,4
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,6,1,2


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for Role in general,Overlap Count for B spk / A lsn,Overlap Count for B lsn / A spk
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,9,2,1
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,10,3,4
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,17,3,9
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,10,2,3
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,27,14,10
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,4,2,2
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,5,2,3
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,8,3,3
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,6,1,1
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,24,7,5


## Overlapping analysis with the count of overlap

### Overlap of person A compared to person B

Is there any overlap between smiles and laughs of person A being speaker to person B being listener? The other way ?

In [7]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}

            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn = 0
        overlap_count_lsn_vs_spk = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_spk_vs_lsn += 1
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_lsn_vs_spk += 1
                
                break
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for A spk / B lsn - S&L': overlap_count_spk_vs_lsn,
            'Overlap Count for A lsn / B spk - S&L': overlap_count_lsn_vs_spk
        }) 
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)

**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - S&L,Overlap Count for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1,37
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,5
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,1,4
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,3,2
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,1,12
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,7,12
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,9,11


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - S&L,Overlap Count for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,2,4
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2,25
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,2,17
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,2,8
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1,4
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1,4
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4,7
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,4,4


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - S&L,Overlap Count for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,1
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,3,31
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,5,55
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,11,2
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,13,3
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,9,3
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,4,13
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,4,10
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3,42
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,31,19


Percentage compared to the total number of smiles and laughs for each files:

In [8]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_B} count in lstB"] = 0

                tiers_in_lstB = set(lstB_tier[tier_B])
                overlapping_data[f"{tier_B} count in lstB"] = len(tiers_in_lstB)

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn = 0
        overlap_count_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        count_smiles = pair_dict["Smiles_0 count in lstB"]
        count_laughs = pair_dict["Laughs_0 count in lstB"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_spk_vs_lsn += 1
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_lsn_vs_spk += 1
                
                break
        if count_smiles != 0 or count_laughs != 0:
            percentage_spk_vs_lsn = overlap_count_spk_vs_lsn / (count_smiles + count_laughs) * 100        
            percentage_lsn_vs_spk= overlap_count_lsn_vs_spk / (count_smiles + count_laughs) * 100
            
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1.818182,67.272727
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0.0,12.5
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,5.0,20.0
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,14.285714,9.52381
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,4.761905,57.142857
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,29.166667,50.0
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,29.032258,35.483871






### IFADV

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,5.555556,11.111111
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,4.347826,54.347826
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,3.921569,33.333333
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,3.773585,15.09434
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,4.761905,19.047619
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,3.703704,14.814815
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,8.0,14.0
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,7.272727,7.272727






### NDC

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0.0,2.439024
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,4.225352,43.661972
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,3.703704,40.740741
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,22.0,4.0
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,12.264151,2.830189
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,56.25,18.75
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,22.222222,72.222222
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,14.814815,37.037037
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,5.172414,72.413793
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,41.333333,25.333333






Let's consider now that we want to see the number of overlap between smiles and smiles, laughs and laughs, smiles and laughs, laughs and smiles.

In [9]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}

            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_spk_vs_lsn_{tierA}_vs_{tierB}"] += 1
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_lsn_vs_spk_{tierA}_vs_{tierB}"] += 1
                
                break 
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for A spk / B lsn - Smiles_0 vs Smiles_0': overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0,
            'Overlap Count for A spk / B lsn - Laughs_0 vs Laughs_0': overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0,
            'Overlap Count for A spk / B lsn - Smiles_0 vs Laughs_0': overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0,
            'Overlap Count for A spk / B lsn - Laughs_0 vs Smiles_0': overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0,
            'Overlap Count for A lsn / B spk - Smiles_0 vs Smiles_0': overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0,
            'Overlap Count for A lsn / B spk - Laughs_0 vs Laughs_0': overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0,
            'Overlap Count for A lsn / B spk - Smiles_0 vs Laughs_0': overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0,
            'Overlap Count for A lsn / B spk - Laughs_0 vs Smiles_0': overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0
        })
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)

**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Count for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Count for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Count for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Count for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Count for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Count for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Count for A lsn / B spk - Laughs_0 vs Smiles_0
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1,0,0,0,33,1,2,1
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0,0,0,3,0,2,0
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,1,0,0,0,4,0,0,0
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,3,0,0,0,2,0,0,0
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,1,0,0,0,12,0,0,0
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,6,0,1,0,12,0,0,0
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,8,0,1,0,11,0,0,0


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Count for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Count for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Count for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Count for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Count for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Count for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Count for A lsn / B spk - Laughs_0 vs Smiles_0
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,2,0,0,0,4,0,0,0
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2,0,0,0,23,0,2,0
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,2,0,0,0,11,4,0,2
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,2,0,0,0,8,0,0,0
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1,0,0,0,4,0,0,0
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1,0,0,0,4,0,0,0
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4,0,0,0,7,0,0,0
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,2,0,0,2,0,1,0,3


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Count for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Count for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Count for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Count for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Count for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Count for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Count for A lsn / B spk - Laughs_0 vs Smiles_0
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0,0,0,1,0,0,0
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,3,0,0,0,29,0,2,0
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,5,0,0,0,44,6,5,0
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,11,0,0,0,2,0,0,0
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,12,0,1,0,3,0,0,0
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7,1,0,1,1,1,0,1
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,3,0,1,0,10,2,1,0
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,3,0,0,1,6,2,1,1
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,2,1,0,0,40,1,1,0
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,28,1,1,1,18,0,1,0


Let's see now for the total number of overlap between the pairs files and the percentage of overlap.

In [10]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_B} count in lstB"] = 0

                tiers_in_lstB = set(lstB_tier[tier_B])
                overlapping_data[f"{tier_B} count in lstB"] = len(tiers_in_lstB)

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        count_smiles = pair_dict["Smiles_0 count in lstB"]
        count_laughs = pair_dict["Laughs_0 count in lstB"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_spk_vs_lsn_{tierA}_vs_{tierB}"] += 1
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_lsn_vs_spk_{tierA}_vs_{tierB}"] += 1
                
                break
        if count_smiles != 0:
            percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0 = overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 / count_smiles * 100        
            percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0 = overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 / count_smiles * 100
            percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0 = overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 / count_smiles * 100
            percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0 = overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 / count_smiles * 100
        if count_laughs != 0:  
            percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0 = overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 / count_laughs * 100
            percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0 = overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 / count_laughs * 100
            percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0 = overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 / count_laughs * 100
            percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0 = overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 / count_laughs * 100
            
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0': percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0,
            'Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0': percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0,
            'Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0': percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0,
            'Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0': percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0,
            'Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0': percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0,
            'Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0': percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0,
            'Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0': percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0,
            'Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0': percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1.960784,0.0,0.0,0.0,64.705882,25.0,50.0,1.960784
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0.0,0.0,0.0,0.0,9.090909,0.0,28.571429,0.0
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,5.263158,0.0,0.0,0.0,21.052632,0.0,0.0,0.0
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,15.789474,0.0,0.0,0.0,10.526316,0.0,0.0,0.0
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,4.761905,0.0,0.0,0.0,57.142857,0.0,0.0,0.0
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,26.086957,0.0,100.0,0.0,52.173913,0.0,0.0,0.0
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,26.666667,0.0,100.0,0.0,36.666667,0.0,0.0,0.0






### IFADV

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,5.882353,0.0,0.0,0.0,11.764706,0.0,0.0,0.0
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,4.651163,0.0,0.0,0.0,53.488372,0.0,66.666667,0.0
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,4.255319,0.0,0.0,0.0,23.404255,100.0,0.0,4.255319
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,4.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,5.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,4.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,8.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,4.0,0.0,0.0,4.0,0.0,20.0,0.0,6.0






### NDC

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0.0,0.0,0.0,0.0,3.333333,0.0,0.0,0.0
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,4.347826,0.0,0.0,0.0,42.028986,0.0,100.0,0.0
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,4.132231,0.0,0.0,0.0,36.363636,42.857143,35.714286,0.0
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,22.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,12.0,0.0,16.666667,0.0,3.0,0.0,0.0,0.0
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,50.0,50.0,0.0,7.142857,7.142857,50.0,0.0,7.142857
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,21.428571,0.0,25.0,0.0,71.428571,50.0,25.0,0.0
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,13.043478,0.0,0.0,4.347826,26.086957,50.0,25.0,4.347826
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3.703704,25.0,0.0,0.0,74.074074,25.0,25.0,0.0
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,39.43662,25.0,25.0,1.408451,25.352113,0.0,25.0,0.0






### Overlap of person B compared to person A

Is there any overlap between smiles and laughs of person B being speaker to person A being listener? The other way ?

In [11]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}

            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn = 0
        overlap_count_lsn_vs_spk = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_spk_vs_lsn += 1
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_lsn_vs_spk += 1
                
                break
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for B spk / A lsn - S&L': overlap_count_spk_vs_lsn,
            'Overlap Count for B lsn / A spk - S&L': overlap_count_lsn_vs_spk
        }) 
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)

**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - S&L,Overlap Count for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,1,21
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,18
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,8,8
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,13,2
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,6
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,21
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,5,13


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - S&L,Overlap Count for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,1,18
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,1,26
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,5
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13,10
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,4
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3,6
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,8,7
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,11


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - S&L,Overlap Count for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,4,2
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,3,6
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,12,9
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,1,0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,12,15
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2,13
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,16,4
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7,7
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,1,3
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,11,25


Percentage compared to the total number of smiles and laughs for each files:

In [12]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_A} count in lstA"] = 0

                tiers_in_lstA = set(lstA_tier[tier_A])
                overlapping_data[f"{tier_A} count in lstA"] = len(tiers_in_lstA)

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn = 0
        overlap_count_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        count_smiles = pair_dict["Smiles_0 count in lstA"]
        count_laughs = pair_dict["Laughs_0 count in lstA"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_spk_vs_lsn += 1
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                overlap_count_lsn_vs_spk += 1
                
                break
        if count_smiles != 0 or count_laughs != 0:
            percentage_spk_vs_lsn = overlap_count_spk_vs_lsn / (count_smiles + count_laughs) * 100        
            percentage_lsn_vs_spk= overlap_count_lsn_vs_spk / (count_smiles + count_laughs) * 100
            
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Percentage for B spk / A lsn - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2.272727,47.727273
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0.0,64.285714
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,22.222222,22.222222
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,17.808219,2.739726
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0.0,31.578947
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0.0,56.756757
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,11.904762,30.952381






### IFADV

Unnamed: 0,Database,Pair,Overlap Percentage for B spk / A lsn - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,4.0,72.0
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,2.173913,56.521739
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0.0,45.454545
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,48.148148,37.037037
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0.0,44.444444
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,27.272727,54.545455
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,25.0,21.875
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0.0,73.333333






### NDC

Unnamed: 0,Database,Pair,Overlap Percentage for B spk / A lsn - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,21.052632,10.526316
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,12.5,25.0
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,24.0,18.0
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,3.448276,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,30.0,37.5
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,10.526316,68.421053
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,66.666667,16.666667
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,31.818182,31.818182
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,2.12766,6.382979
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,15.068493,34.246575






Let's consider now that we want to see the number of overlap between smiles and smiles, laughs and laughs, smiles and laughs, laughs and smiles.

In [13]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}

            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict
dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_count_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_spk_vs_lsn_{tierA}_vs_{tierB}"] += 1
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_lsn_vs_spk_{tierA}_vs_{tierB}"] += 1
                
                break 
        overlap_count_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Count for B spk / A lsn - Smiles_0 vs Smiles_0': overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0,
            'Overlap Count for B spk / A lsn - Laughs_0 vs Laughs_0': overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0,
            'Overlap Count for B spk / A lsn - Smiles_0 vs Laughs_0': overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0,
            'Overlap Count for B spk / A lsn - Laughs_0 vs Smiles_0': overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0,
            'Overlap Count for B lsn / A spk - Smiles_0 vs Smiles_0': overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0,
            'Overlap Count for B lsn / A spk - Laughs_0 vs Laughs_0': overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0,
            'Overlap Count for B lsn / A spk - Smiles_0 vs Laughs_0': overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0,
            'Overlap Count for B lsn / A spk - Laughs_0 vs Smiles_0': overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0
        })
    df_overlap_count = pd.DataFrame(overlap_count_list)
    dataframes[database] = df_overlap_count

for database, df in dataframes.items():
    display(Markdown(f"**Database: {database}**"))
    display(df)

**Database: CCDB**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - Smiles_0 vs Smiles_0,Overlap Count for B spk / A lsn - Laughs_0 vs Laughs_0,Overlap Count for B spk / A lsn - Smiles_0 vs Laughs_0,Overlap Count for B spk / A lsn - Laughs_0 vs Smiles_0,Overlap Count for B lsn / A spk - Smiles_0 vs Smiles_0,Overlap Count for B lsn / A spk - Laughs_0 vs Laughs_0,Overlap Count for B lsn / A spk - Smiles_0 vs Laughs_0,Overlap Count for B lsn / A spk - Laughs_0 vs Smiles_0
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,1,0,0,0,19,1,1,0
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0,0,0,18,0,0,0
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7,0,0,1,8,0,0,0
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,13,0,0,0,2,0,0,0
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0,0,0,6,0,0,0
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0,0,0,21,0,0,0
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,5,0,0,0,13,0,0,0


**Database: IFADV**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - Smiles_0 vs Smiles_0,Overlap Count for B spk / A lsn - Laughs_0 vs Laughs_0,Overlap Count for B spk / A lsn - Smiles_0 vs Laughs_0,Overlap Count for B spk / A lsn - Laughs_0 vs Smiles_0,Overlap Count for B lsn / A spk - Smiles_0 vs Smiles_0,Overlap Count for B lsn / A spk - Laughs_0 vs Laughs_0,Overlap Count for B lsn / A spk - Smiles_0 vs Laughs_0,Overlap Count for B lsn / A spk - Laughs_0 vs Smiles_0
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,1,0,0,0,18,0,0,0
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,1,0,0,0,26,0,0,0
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0,0,0,2,2,1,0
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,11,0,0,2,10,0,0,0
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0,0,0,4,0,0,0
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3,0,0,0,6,0,0,0
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,7,0,0,1,6,0,0,1
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0,0,0,8,2,0,1


**Database: NDC**

Unnamed: 0,Database,Pair,Overlap Count for B spk / A lsn - Smiles_0 vs Smiles_0,Overlap Count for B spk / A lsn - Laughs_0 vs Laughs_0,Overlap Count for B spk / A lsn - Smiles_0 vs Laughs_0,Overlap Count for B spk / A lsn - Laughs_0 vs Smiles_0,Overlap Count for B lsn / A spk - Smiles_0 vs Smiles_0,Overlap Count for B lsn / A spk - Laughs_0 vs Laughs_0,Overlap Count for B lsn / A spk - Smiles_0 vs Laughs_0,Overlap Count for B lsn / A spk - Laughs_0 vs Smiles_0
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2,1,1,0,2,0,0,0
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,3,0,0,0,6,0,0,0
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9,2,1,0,8,1,0,0
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,1,0,0,0,0,0,0,0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,11,1,0,0,14,0,0,1
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,1,1,0,0,10,1,0,2
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,14,2,0,0,4,0,0,0
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,6,1,0,0,6,1,0,0
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,1,0,0,0,2,0,0,1
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,9,0,0,2,21,0,1,3


Let's see now for the total number of overlap between the pairs files and the percentage of overlap.

In [14]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_A} count in lstA"] = 0

                tiers_in_lstA = set(lstA_tier[tier_A])
                overlapping_data[f"{tier_A} count in lstA"] = len(tiers_in_lstA)

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0 = 0
        percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0 = 0
        percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0 = 0
        percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0 = 0
        percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0 = 0
        count_smiles = pair_dict["Smiles_0 count in lstA"]
        count_laughs = pair_dict["Laughs_0 count in lstA"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_spk_vs_lsn_{tierB}_vs_{tierA}"] += 1
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                globals()[f"overlap_count_lsn_vs_spk_{tierB}_vs_{tierA}"] += 1
                
                break
        if count_smiles != 0:
            percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0 = overlap_count_spk_vs_lsn_Smiles_0_vs_Smiles_0 / count_smiles * 100        
            percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0 = overlap_count_lsn_vs_spk_Smiles_0_vs_Smiles_0 / count_smiles * 100
            percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0 = overlap_count_lsn_vs_spk_Laughs_0_vs_Smiles_0 / count_smiles * 100
            percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0 = overlap_count_spk_vs_lsn_Laughs_0_vs_Smiles_0 / count_smiles * 100
        if count_laughs != 0:  
            percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0 = overlap_count_spk_vs_lsn_Laughs_0_vs_Laughs_0 / count_laughs * 100
            percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0 = overlap_count_lsn_vs_spk_Laughs_0_vs_Laughs_0 / count_laughs * 100
            percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0 = overlap_count_lsn_vs_spk_Smiles_0_vs_Laughs_0 / count_laughs * 100
            percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0 = overlap_count_spk_vs_lsn_Smiles_0_vs_Laughs_0 / count_laughs * 100
            
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0': percentage_spk_vs_lsn_Smiles_0_vs_Smiles_0,
            'Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0': percentage_spk_vs_lsn_Laughs_0_vs_Laughs_0,
            'Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0': percentage_spk_vs_lsn_Smiles_0_vs_Laughs_0,
            'Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0': percentage_spk_vs_lsn_Laughs_0_vs_Smiles_0,
            'Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0': percentage_lsn_vs_spk_Smiles_0_vs_Smiles_0,
            'Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0': percentage_lsn_vs_spk_Laughs_0_vs_Laughs_0,
            'Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0': percentage_lsn_vs_spk_Smiles_0_vs_Laughs_0,
            'Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0': percentage_lsn_vs_spk_Laughs_0_vs_Smiles_0
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2.325581,0.0,0.0,0.0,44.186047,100.0,0.0,2.325581
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0.0,0.0,0.0,0.0,64.285714,0.0,0.0,0.0
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,26.923077,0.0,10.0,0.0,30.769231,0.0,0.0,0.0
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,20.634921,0.0,0.0,0.0,3.174603,0.0,0.0,0.0
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0.0,0.0,0.0,0.0,31.578947,0.0,0.0,0.0
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0.0,0.0,0.0,0.0,56.756757,0.0,0.0,0.0
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,11.904762,0.0,0.0,0.0,30.952381,0.0,0.0,0.0






### IFADV

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,4.0,0.0,0.0,0.0,72.0,0.0,0.0,0.0
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,2.222222,0.0,0.0,0.0,57.777778,0.0,0.0,0.0
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0.0,0.0,0.0,0.0,28.571429,50.0,0.0,14.285714
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,44.0,0.0,100.0,0.0,40.0,0.0,0.0,0.0
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0.0,0.0,0.0,0.0,44.444444,0.0,0.0,0.0
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,27.272727,0.0,0.0,0.0,54.545455,0.0,0.0,0.0
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,25.0,0.0,25.0,0.0,21.428571,0.0,25.0,0.0
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0.0,0.0,0.0,0.0,80.0,40.0,20.0,0.0






### NDC

Unnamed: 0,Database,Pair,Overlap Percentage for A spk / B lsn - Smiles_0 vs Smiles_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Smiles_0 vs Laughs_0,Overlap Percentage for A spk / B lsn - Laughs_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Smiles_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Smiles_0 vs Laughs_0,Overlap Percentage for A lsn / B spk - Laughs_0 vs Smiles_0
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,12.5,33.333333,0.0,6.25,12.5,0.0,0.0,0.0
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,13.043478,0.0,0.0,0.0,26.086957,0.0,0.0,0.0
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,20.930233,28.571429,0.0,2.325581,18.604651,14.285714,0.0,0.0
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,28.947368,50.0,0.0,0.0,36.842105,0.0,50.0,0.0
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,6.666667,25.0,0.0,0.0,66.666667,25.0,50.0,0.0
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,63.636364,100.0,0.0,0.0,18.181818,0.0,0.0,0.0
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,35.294118,20.0,0.0,0.0,35.294118,20.0,0.0,0.0
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,2.272727,0.0,0.0,0.0,4.545455,0.0,33.333333,0.0
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,13.636364,0.0,28.571429,0.0,31.818182,0.0,42.857143,1.515152






## Overlapping thanks to duration

First, we study the overlap between the person A to person B.

-> The total duration correspond to the duration of the sequences of the tier concerned during all the video/file.

### Total duration come from the person B

For each pair of file, we compute the percentage of overlap for S&L between the two person in the interaction regardless the entity of the tier studied. 

In [15]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_B} duration in lstB"] = 0

                tiers_in_lstB = set(lstB_tier[tier_B])
                for seg in tiers_in_lstB:
                    overlapping_data[f"{tier_B} duration in lstB"] += seg[1] - seg[0]

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration_smiles = pair_dict["Smiles_0 duration in lstB"]
        duration_laughs = pair_dict["Laughs_0 duration in lstB"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]         
                break
        if duration_smiles != 0 or duration_laughs !=0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration_smiles + duration_laughs) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration_smiles + duration_laughs) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,1.502313,46640,38.184125
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,6.020608
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,0.386833,4790,7.266383
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,3.29956,1700,2.832956
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,0.989283,12790,23.431346
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,17.75529,13740,42.134315
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,9.634421,12110,23.738116






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,0.496081,3940,3.909118
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,2.526359,32568,33.176794
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,1.496947,19830,26.038999
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,1.614654,6790,9.213026
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,3.523759,5700,10.144154
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,2.289195,3570,7.497637
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,4.105022,13740,11.452386
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,1.171128,8890,10.623805






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,1.672722
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,6.365459,22129,22.412291
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,3.313668,38039,17.196266
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,10.09428,1170,1.470773
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,6.857743,2300,1.664676
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,16.308787,1705,3.743468
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,2.507042,36190,33.981221
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,3.36733,9820,12.534943
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,1.902478,61760,33.666767
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,18.811697,15110,14.08477






### Total duration come from the person A

In [16]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_A} duration in lstA"] = 0

                tiers_in_lstA = set(lstA_tier[tier_A])
                for seg in tiers_in_lstA:
                    overlapping_data[f"{tier_A} duration in lstA"] += seg[1] - seg[0]

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration_smiles = pair_dict["Smiles_0 duration in lstA"]
        duration_laughs = pair_dict["Laughs_0 duration in lstA"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]         
                break
        if duration_smiles != 0 or duration_laughs !=0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration_smiles + duration_laughs) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration_smiles + duration_laughs) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,1.702068,46640,43.261293
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,11.279879
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,0.30108,4790,5.655588
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,2.797598,1700,2.401978
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,1.541976,12790,36.521987
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,9.934798,13740,23.575841
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,8.077239,12110,19.901397






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,0.883548,3940,6.962361
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,2.610939,32568,34.287519
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,4.014085,19830,69.823944
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,2.203296,6790,12.571746
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,8.48329,5700,24.421594
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,4.182655,3570,13.699156
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,8.393694,13740,23.417128
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,4.402516,8890,39.937107






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,7.005122
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,13.436378,22129,47.308449
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,9.14501,38039,47.457987
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,22.8125,1170,3.323864
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,17.153357,2300,4.163876
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,20.012393,1705,4.593582
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,2.956123,36190,40.068201
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,8.396461,9820,31.255968
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,2.739038,61760,48.470769
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,20.560757,15110,15.394333






### Total duration is the union of the duration of person A and person B

In [17]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

                overlapping_data[f"Total duration"] = 0
                tiers_in_lstA = set(lstA_tier[tier_A])
                tiers_in_lstB = set(lstB_tier[tier_B])
                for segA, segB in zip(tiers_in_lstA, tiers_in_lstB):
                    if segA[0] < segB[0] and segA[1] > segB[1]:
                        overlapping_data[f"Total duration"] += segA[1] - segA[0]
                    elif segB[0] < segA[1] and segB[1] > segA[0]:
                        overlapping_data[f"Total duration"] += segB[1] - segB[0]
                    elif segA[0] > segB[1] or segA[1] < segB[0]:
                        overlapping_data[f"Total duration"] += segA[1] - segA[0]
                    elif segB[0] > segA[1] or segB[1] < segA[0]:
                        overlapping_data[f"Total duration"] += segB[1] - segB[0]
                    elif segA[0] < segB[0] and segA[1] < segB[1]:
                        overlapping_data[f"Total duration"] += (segB[1] - segA[0]) - (segA[1] - segB[0])
                    elif segB[0] < segA[0] and segB[1] < segA[1]:
                        overlapping_data[f"Total duration"] += (segA[1] - segB[0]) - (segB[1] - segA[0])

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        total_duration = pair_dict["Total duration"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]         
                break
        if total_duration != 0:
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / total_duration * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / total_duration * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,1.595375,46640,40.54947
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,10.495113
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,0.408719,4790,7.677512
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,2.07048,1700,1.777685
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,0.78534,12790,18.600931
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,12.105373,13740,28.726741
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,8.522629,12110,20.998786






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,0.50888,3940,4.009974
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,1.860047,32568,24.426611
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,1.901268,19830,33.072048
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,1.977401,6790,11.282818
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,3.105882,5700,8.941176
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,1.775533,3570,5.815279
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,8.213124,13740,22.913366
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,2.640086,8890,23.949353






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,7.508477
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,5.561258,22129,19.58076
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,3.561174,38039,18.480695
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,3.034655,1170,0.44216
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,4.692336,2300,1.139037
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,15.882994,1705,3.645733
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,2.805476,36190,38.02629
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,2.102193,9820,7.825449
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,1.887364,61760,33.39931
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,11.498621,15110,8.609294






-> Now the total duration correspond to the duration of only the sequences where there is an overlap. To explain more, to calculate the total duration, we only took the duration of the sequence where there is an overlap between the two person in the interaction and not all the duration of smiles of the video for example.

### Total duration come from the person B

In [18]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}


            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += b[1] - b[0]
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += b[1] - b[0]
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]         
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,2.092718,46640,53.190397
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,58.849955
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,2.770234,4790,52.036936
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,5.889352,1700,5.056514
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,1.456704,12790,34.502293
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,24.617347,13740,58.418367
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,11.867681,12110,29.240613






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,4.258944,3940,33.560477
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,3.880761,32568,50.963148
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,4.225352,19830,73.498888
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,7.970529,6790,45.478902
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,12.313433,5700,35.447761
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,5.291262,3570,17.330097
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,14.88815,13740,41.535671
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,7.286245,8890,66.096654






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,42.562929
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,11.504036,22129,40.504823
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,7.473491,38039,38.783646
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,38.384321,1170,5.592734
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,25.705372,2300,6.239826
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,28.372804,1705,6.512605
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,2.529847,36190,34.290316
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,6.364909,9820,23.693481
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,2.303326,61760,40.760296
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,23.813794,15110,17.82996






### Total duration come from the person A

In [19]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += A[1] - A[0]
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += A[1] - A[0]
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]         
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,1.239404,46640,31.501807
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,53.776683
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,1.486447,4790,27.921889
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,53.804348,1700,46.195652
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,1.53148,12790,36.273398
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,7.685161,13740,18.237324
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,8.568689,12110,21.112273






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,3.320053,3940,26.162019
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,2.583199,32568,33.923233
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,1.257584,19830,21.875345
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,4.099208,6790,23.389597
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,8.108108,5700,23.341523
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,11.307054,3570,37.033195
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,12.812175,13740,35.744017
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,1.677508,8890,15.217391






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,100.0
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,7.858706,22129,27.669897
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,6.46527,38039,33.551488
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,38.820401,1170,5.656273
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,36.597142,2300,8.883739
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,29.70012,1705,6.817273
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,2.959105,36190,40.108611
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,12.447506,9820,46.336054
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,0.959767,61760,16.984297
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,20.557616,15110,15.391981






### Total duration is the union of the duration of person A and person B

In [20]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_A}_&_{pair_file_B}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstA_tier[tier_A], lstB_tier[tier_B])
                overlapping_data[f"{tier_A} vs {tier_B}"] = {'Segments': overlapping_segments}

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentA, segmentB in segments.items():
            for segB in segmentB:
                segment_key = f"{segB}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if A is "spk" and B is "lsn"
                    if (segmentA[2].replace(" ", "") == "spk" and segB[2].replace(" ", "") == "lsn"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - b[0]
                                                    duration += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - A[0]
                                                    duration += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_spk_vs_lsn += b[1] - A[0]
                                                    duration += (A[1] - b[0]) - (b[1] - A[0])
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_spk_vs_lsn += A[1] - b[0]
                                                    duration += (b[1] - A[0]) - (A[1] - b[0])
                    # Check if A is "lsn" and B is "spk"
                    elif (segmentA[2].replace(" ", "") == "lsn" and segB[2].replace(" ", "") == "spk"):
                        for tierA, tierB in expression:
                            segments_tier = pair_dict[f"{tierA} vs {tierB}"]["Segments"]   
                            for A, B in segments_tier.items():
                                if A[0] < segB[1] and A[1] > segB[0]:
                                    for b in B:
                                        if b[0] < segB[1] and b[1] > segB[0]:
                                            tier_key = f"{b}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if b[0] > A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - b[0]
                                                    duration += A[1] - A[0]
                                                elif b[0] < A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - A[0]
                                                    duration += b[1] - b[0]
                                                elif b[0] < A[0] and b[1] < A[1]:
                                                    overlap_duration_lsn_vs_spk += b[1] - A[0]
                                                    duration += (A[1] - b[0]) - (b[1] - A[0])
                                                elif b[0] > A[0] and b[1] > A[1]:
                                                    overlap_duration_lsn_vs_spk += A[1] - b[0]    
                                                    duration += (b[1] - A[0]) - (A[1] - b[0])     
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for A spk / B lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for A spk / B lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for A lsn / B spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for A lsn / B spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,CCDB,P12_P2_1402_dizzy.eaf_&_P12_P2_1402_monk.eaf,1835,1.128988,46640,28.695358
1,CCDB,P12_P3_2202_dizzy.eaf_&_P12_P3_2202_monk.eaf,0,0.0,6550,64.152791
2,CCDB,P14_P7_2502_dizzy.eaf_&_P14_P7_2502_monk.eaf,255,1.305015,4790,24.513818
3,CCDB,P15_P13_2402_dizzy.eaf_&_P15_P13_2402_monk.eaf,1980,5.889352,1700,5.056514
4,CCDB,P16_P6_0903_dizzy.eaf_&_P16_P6_0903_monk.eaf,540,1.079029,12790,25.556999
5,CCDB,P18_P10_2102_dizzy.eaf_&_P18_P10_2102_monk.eaf,5790,8.464294,13740,20.086251
6,CCDB,P18_P1_2102_dizzy.eaf_&_P18_P1_2102_monk.eaf,4915,6.519432,12110,16.063138






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,IFADV,DVA1A.eaf_&_DVB1B.eaf,500,2.319109,3940,18.274583
1,IFADV,DVA2C.eaf_&_DVB2D.eaf,2480,2.19306,32568,28.79983
2,IFADV,DVA3E.eaf_&_DVB3F.eaf,1140,1.246174,19830,21.676869
3,IFADV,DVA4C.eaf_&_DVB4G.eaf,1190,3.833763,6790,21.875
4,IFADV,DVA5G.eaf_&_DVB5H.eaf,1980,6.989058,5700,20.120014
5,IFADV,DVA6H.eaf_&_DVB6I.eaf,1090,4.41117,3570,14.447592
6,IFADV,DVA7B.eaf_&_DVB7J.eaf,4925,12.534996,13740,34.97073
7,IFADV,DVA8K.eaf_&_DVB8L.eaf,980,1.633606,8890,14.819137






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for A spk / B lsn - S&L,Overlap Percentage for A spk / B lsn - S&L,Overlap Duration for A lsn / B spk - S&L,Overlap Percentage for A lsn / B spk - S&L
0,NDC,13_1_A_M.eaf_&_13_1_B_F.eaf,0,0.0,1860,42.562929
1,NDC,13_2_A_M.eaf_&_13_2_B_F.eaf,6285,6.739585,22129,23.729559
2,NDC,13_4_A_M.eaf_&_13_4_B_F.eaf,7330,5.308901,38039,27.550518
3,NDC,14_1_A_M.eaf_&_14_1_B_F.eaf,8030,29.979466,1170,4.368116
4,NDC,14_2_A_M.eaf_&_14_2_B_F.eaf,9475,21.301709,2300,5.170863
5,NDC,17_1_A_F.eaf_&_17_1_B_F.eaf,7428,21.022245,1705,4.825381
6,NDC,17_2_A_F.eaf_&_17_2_B_F.eaf,2670,1.900356,36190,25.758007
7,NDC,17_3_A_F.eaf_&_17_3_B_F.eaf,2638,6.120224,9820,22.782637
8,NDC,17_4_A_F.eaf_&_17_4_B_F.eaf,3490,0.817675,61760,14.4698
9,NDC,18_1_A_M.eaf_&_18_1_B_M.eaf,20181,15.398761,15110,11.529423






Now, we study the overlap between the person B to person A for the same 3 ways of calculating the total duration.

-> The total duration correspond to the duration of the sequences of the tier concerned during all the video/file.

### Total duration come from the person A

In [21]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_A} duration in lstA"] = 0

                tiers_in_lstA = set(lstA_tier[tier_A])
                for seg in tiers_in_lstA:
                    overlapping_data[f"{tier_A} duration in lstA"] += seg[1] - seg[0]

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration_smiles = pair_dict["Smiles_0 duration in lstA"]
        duration_laughs = pair_dict["Laughs_0 duration in lstA"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0]         
                break
        if duration_smiles != 0 or duration_laughs !=0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration_smiles + duration_laughs) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration_smiles + duration_laughs) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,2.49513,30430,28.225582
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,37.909003
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,8.908436,13233,15.624299
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,7.368421,1130,1.596609
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,24.071959
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,31.125601
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,5.497124,12870,21.15037






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,1.069093,25795,45.582258
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.147392,34095,35.895141
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,11.426056
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,25.847065,8470,15.682281
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,29.777207
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,14.178818,13890,53.300077
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,18.866638,6700,11.418833
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,44.743935






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,10.846641,3660,13.784272
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,14.53737,2695,5.761502
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,11.253478,8552,10.669594
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,2.414773,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,17.805094,14070,25.472057
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,7.745777,9097,24.508985
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,51.815192,5711,6.323004
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,25.431281,4675,14.880005
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,5.446683,5520,4.332232
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,5.715567,16650,16.963312






### Total duration come from the person B

In [22]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

                overlapping_data[f"{tier_B} duration in lstB"] = 0

                tiers_in_lstB = set(lstB_tier[tier_B])
                for seg in tiers_in_lstB:
                    overlapping_data[f"{tier_B} duration in lstB"] += seg[1] - seg[0]

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration_smiles = pair_dict["Smiles_0 duration in lstB"]
        duration_laughs = pair_dict["Laughs_0 duration in lstB"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0]         
                break
        if duration_smiles != 0 or duration_laughs !=0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration_smiles + duration_laughs) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration_smiles + duration_laughs) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,2.202301,30430,24.913013
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,20.233839
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,11.445692,13233,20.074333
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,8.690508,1130,1.883082
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,15.443803
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,55.627108
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,6.556895,12870,25.227874






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,0.600258,25795,25.592817
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.142617,34095,34.732338
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,4.261047
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,18.941655,8470,11.492537
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,12.368749
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,7.76016,13890,29.17148
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,9.226922,6700,5.584497
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,11.902486






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,2.590021,3660,3.291485
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,6.887052,2695,2.729501
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,4.077666,8552,3.866097
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,1.06851,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,7.118301,14070,10.183476
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,6.3123,9097,19.973214
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,43.943662,5711,5.362441
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,10.199002,4675,5.967501
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,3.78315,5520,3.009076
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,5.229355,16650,15.520279






### Total duration come from the union of the duration of person A and person B

In [23]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}

                overlapping_data[f"Total duration"] = 0
                tiers_in_lstA = set(lstA_tier[tier_A])
                tiers_in_lstB = set(lstB_tier[tier_B])
                for segA, segB in zip(tiers_in_lstA, tiers_in_lstB):
                    if segA[0] < segB[0] and segA[1] > segB[1]:
                        overlapping_data[f"Total duration"] += segA[1] - segA[0]
                    elif segB[0] < segA[1] and segB[1] > segA[0]:
                        overlapping_data[f"Total duration"] += segB[1] - segB[0]
                    elif segA[0] > segB[1] or segA[1] < segB[0]:
                        overlapping_data[f"Total duration"] += segA[1] - segA[0]
                    elif segB[0] > segA[1] or segB[1] < segA[0]:
                        overlapping_data[f"Total duration"] += segB[1] - segB[0]
                    elif segA[0] < segB[0] and segA[1] < segB[1]:
                        overlapping_data[f"Total duration"] += (segB[1] - segA[0]) - (segA[1] - segB[0])
                    elif segB[0] < segA[0] and segB[1] < segA[1]:
                        overlapping_data[f"Total duration"] += (segA[1] - segB[0]) - (segB[1] - segA[0])

            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        total_duration = pair_dict["Total duration"]
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0]         
                break
        if total_duration != 0:
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / total_duration * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / total_duration * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,2.338724,30430,26.456268
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,35.271591
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,12.093284,13233,21.21013
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,5.45331,1130,1.181638
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,12.260035
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,37.925988
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,5.800243,12870,22.316629






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,0.615745,25795,26.253117
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.105003,34095,25.571889
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,5.411941
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,23.197075,8470,14.074443
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,10.901961
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,6.018896,13890,22.625835
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,18.460769,6700,11.173184
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,26.831897






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,11.626029,3660,14.774746
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,6.016954,2695,2.38466
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,4.382236,8552,4.154865
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,0.321227,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,4.87062,14070,6.967934
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,6.147497,9097,19.45175
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,49.174644,5711,6.000778
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,6.367143,4675,3.725456
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,3.753096,5520,2.985171
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,3.196435,16650,9.486747






-> Now the total duration correspond to the duration of only the sequences where there is an overlap. To explain more, to calculate the total duration, we only took the duration of the sequence where there is an overlap between the two person in the interaction and not all the duration of smiles of the video for example.

### Total duration come from the person A

In [24]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}


            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += a[1] - a[0]
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += a[1] - a[0]
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0]         
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,3.554205,30430,40.206117
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,46.863092
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,17.328893,13233,30.392742
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,45.426829,1130,9.843206
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,91.830065
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,54.803625
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,8.608931,12870,33.12315






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,1.411573,25795,60.184321
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.215302,34095,52.433679
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,63.255361
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,27.903258,8470,16.929842
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,45.159194
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,15.824411,13890,59.486081
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,30.652084,6700,18.551848
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,50.44315






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,33.141542,3660,42.117376
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,29.941438,2695,11.866496
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,28.710571,8552,27.220931
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,72.649573,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,27.770719,14070,39.728928
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,8.850239,9097,28.003694
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,60.810021,5711,7.420642
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,35.393134,4675,20.708749
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,31.121076,5520,24.753363
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,10.868933,16650,32.258065






### Total duration come from the person B

In [25]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}


            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += B[1] - B[0]
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                duration += B[1] - B[0]
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0]         
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,3.821566,30430,43.230573
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,26.927217
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,10.778571,13233,18.904286
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,7.623043,1130,1.651781
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,22.6552
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,32.491492
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,6.817487,12870,26.230511






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,0.503391,25795,21.462745
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.154338,34095,37.586815
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,65.754813
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,29.835435,8470,18.102159
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,51.749814
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,5.633481,13890,21.177009
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,21.718658,6700,13.144987
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,32.855022






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,14.407204,3660,18.309155
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,34.711588,2695,13.757019
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,19.99468,8552,18.957262
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,60.283688,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,8.188668,14070,11.71475
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,7.36066,9097,23.290407
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,25.210084,5711,3.076384
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,17.700487,4675,10.356668
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,25.054152,5520,19.927798
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,9.041096,16650,26.833199






### Total duration come from the union of the duration of person A and person B

In [26]:
lstA = {}
lstB = {}
expression_pairs = [("Smiles_0", "Smiles_0"), 
                    ("Smiles_0", "Laughs_0"), 
                    ("Laughs_0", "Laughs_0"), 
                    ("Laughs_0", "Smiles_0"), 
                    ("Role", "Role")]

overlapping_segments_dict = {}

for i, database in enumerate(databases_name):
    if database == databases_pairs[i].replace('_pairs', '').upper():
        databases_list = databases_pair_paths[databases_pairs[i]]
        dataset_dict = {}

        for i in range(0, len(databases_list), 2):
            filepath_A = databases_list[i]
            filepath_B = databases_list[i+1]
            pair_file_A = os.path.basename(filepath_A)
            pair_file_B = os.path.basename(filepath_B)

            if pair_file_A and pair_file_B:
                pair_name = f"{pair_file_B}_&_{pair_file_A}"

            pair_dict = {}
            overlapping_data = {}
            for tier_A, tier_B in expression_pairs:
                lstA_tier = get_tier_from_file(filepath_A, tier_A)
                lstB_tier = get_tier_from_file(filepath_B, tier_B)

                if tier_A in lstA:
                    lstA[tier_A].extend(lstA_tier[tier_A])
                else:
                    lstA[tier_A] = lstA_tier[tier_A]

                if tier_B in lstB:
                    lstB[tier_B].extend(lstB_tier[tier_B])
                else:
                    lstB[tier_B] = lstB_tier[tier_B]
                

                overlapping_segments = get_overlapping_segments(lstB_tier[tier_B], lstA_tier[tier_A])
                overlapping_data[f"{tier_B} vs {tier_A}"] = {'Segments': overlapping_segments}


            dataset_dict[pair_name] = overlapping_data

        overlapping_segments_dict[database] = dataset_dict

dataframes = {}
overlap_segments_set = set()
for database, dataset_dict in overlapping_segments_dict.items():
    overlap_percentage_list = []
    for pair_name, pair_dict in dataset_dict.items():
        overlap_duration_spk_vs_lsn = 0
        overlap_duration_lsn_vs_spk = 0
        percentage_spk_vs_lsn = 0
        percentage_lsn_vs_spk = 0
        duration = 0
        segments = pair_dict["Role vs Role"]["Segments"]
        expression = [("Smiles_0", "Smiles_0"), ("Laughs_0", "Laughs_0"), ("Smiles_0", "Laughs_0"), ("Laughs_0", "Smiles_0")]
        for segmentB, segmentA in segments.items():
            for segA in segmentA:
                segment_key = f"{segA}"
                if segment_key not in overlap_segments_set:
                    overlap_segments_set.add(segment_key)
                    # Check if B is "spk" and A is "lsn"
                    if (segmentB[2].replace(" ", "") == "spk" and segA[2].replace(" ", "") == "lsn"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - a[0]
                                                    duration += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - B[0]
                                                    duration += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_spk_vs_lsn += a[1] - B[0]
                                                    duration += (B[1] - a[0]) - (a[1] - B[0])
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_spk_vs_lsn += B[1] - a[0]
                                                    duration += (a[1] - B[0]) - (B[1] - a[0])
                    # Check if B is "lsn" and A is "spk"
                    elif (segmentB[2].replace(" ", "") == "lsn" and segA[2].replace(" ", "") == "spk"):
                        for tierB, tierA in expression:
                            segments_tier = pair_dict[f"{tierB} vs {tierA}"]["Segments"]   
                            for B, A in segments_tier.items():
                                if B[0] < segA[1] and B[1] > segA[0]:
                                    for a in A:
                                        if a[0] < segA[1] and a[1] > segA[0]:
                                            tier_key = f"{a}"
                                            if tier_key not in overlap_segments_set:
                                                overlap_segments_set.add(tier_key)
                                                if a[0] > B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - a[0]
                                                    duration += B[1] - B[0]
                                                elif a[0] < B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - B[0]
                                                    duration += a[1] - a[0]
                                                elif a[0] < B[0] and a[1] < B[1]:
                                                    overlap_duration_lsn_vs_spk += a[1] - B[0]
                                                    duration += (B[1] - a[0]) - (a[1] - B[0])
                                                elif a[0] > B[0] and a[1] > B[1]:
                                                    overlap_duration_lsn_vs_spk += B[1] - a[0] 
                                                    duration += (a[1] - B[0]) - (B[1] - a[0])        
                break
        if duration != 0 :
            percentage_spk_vs_lsn = overlap_duration_spk_vs_lsn / (duration) * 100
            percentage_lsn_vs_spk = overlap_duration_lsn_vs_spk / (duration) * 100
        overlap_percentage_list.append({
            'Database': database,
            'Pair': pair_name,
            'Overlap Duration for B spk / A lsn - S&L': overlap_duration_spk_vs_lsn,
            'Overlap Percentage for B spk / A lsn - S&L': percentage_spk_vs_lsn,
            'Overlap Duration for B lsn / A spk - S&L': overlap_duration_lsn_vs_spk,
            'Overlap Percentage for B lsn / A spk - S&L': percentage_lsn_vs_spk,
        })
    df_overlap_percentage = pd.DataFrame(overlap_percentage_list)
    dataframes[database] = df_overlap_percentage

for database, df in dataframes.items():
    display(Markdown(f"### {database}"))
    display(df)
    print("\n")

### CCDB

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,CCDB,P12_P2_1402_monk.eaf_&_P12_P2_1402_dizzy.eaf,2690,2.746019,30430,31.063699
1,CCDB,P12_P3_2202_monk.eaf_&_P12_P3_2202_dizzy.eaf,0,0.0,22013,22.797225
2,CCDB,P14_P7_2502_monk.eaf_&_P14_P7_2502_dizzy.eaf,7545,9.246663,13233,16.217508
3,CCDB,P15_P13_2402_monk.eaf_&_P15_P13_2402_dizzy.eaf,5215,7.23592,1130,1.567898
4,CCDB,P16_P6_0903_monk.eaf_&_P16_P6_0903_dizzy.eaf,0,0.0,8430,24.591599
5,CCDB,P18_P10_2102_monk.eaf_&_P18_P10_2102_dizzy.eaf,0,0.0,18140,30.623787
6,CCDB,P18_P1_2102_monk.eaf_&_P18_P1_2102_dizzy.eaf,3345,5.063962,12870,19.483764






### IFADV

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,IFADV,DVB1B.eaf_&_DVA1A.eaf,605,0.493716,25795,21.050269
1,IFADV,DVB2D.eaf_&_DVA2C.eaf,140,0.130652,34095,31.818394
2,IFADV,DVB3F.eaf_&_DVA3E.eaf,0,0.0,3245,69.263607
3,IFADV,DVB4G.eaf_&_DVA4C.eaf,13960,22.930355,8470,13.912615
4,IFADV,DVB5H.eaf_&_DVA5G.eaf,0,0.0,6950,46.581769
5,IFADV,DVB6I.eaf_&_DVA6H.eaf,3695,5.37064,13890,20.188953
6,IFADV,DVB7J.eaf_&_DVA7B.eaf,11070,19.385343,6700,11.732773
7,IFADV,DVB8L.eaf_&_DVA8K.eaf,0,0.0,9960,27.449359






### NDC

Unnamed: 0,Database,Pair,Overlap Duration for B spk / A lsn - S&L,Overlap Percentage for B spk / A lsn - S&L,Overlap Duration for B lsn / A spk - S&L,Overlap Percentage for B lsn / A spk - S&L
0,NDC,13_1_B_F.eaf_&_13_1_A_M.eaf,2880,15.643672,3660,19.8805
1,NDC,13_2_B_F.eaf_&_13_2_A_M.eaf,6800,23.75048,2695,9.412874
2,NDC,13_4_B_F.eaf_&_13_4_A_M.eaf,9020,18.189518,8552,17.24576
3,NDC,14_1_B_F.eaf_&_14_1_A_M.eaf,850,96.590909,0,0.0
4,NDC,14_2_B_F.eaf_&_14_2_A_M.eaf,9835,7.874615,14070,11.265463
5,NDC,17_1_B_F.eaf_&_17_1_A_F.eaf,2875,5.102494,9097,16.145177
6,NDC,17_2_B_F.eaf_&_17_2_A_F.eaf,46800,24.07779,5711,2.938211
7,NDC,17_3_B_F.eaf_&_17_3_A_F.eaf,7990,16.861876,4675,9.865991
8,NDC,17_4_B_F.eaf_&_17_4_A_F.eaf,6940,20.599584,5520,16.384684
9,NDC,18_1_B_M.eaf_&_18_1_A_M.eaf,5610,7.117031,16650,21.12274






All overlapping functions have been developed and put in the IBPY library in the script: interaction_analysis.py for easier use later.

Here is a list of the functions developed:
- overlap_count(databases_name, databases_pairs, databases_pair_paths, choice, tier="Role")
- overlap_count_SL(databases_name, databases_pairs, databases_pair_paths, expression_pairs, expressions_track, choice)
- overlap_count_SL_advanced(databases_name, databases_pairs, databases_pair_paths, expression_pairs, expressions_track, choice)
- overlap_total_duration_B(databases_name, databases_pairs, databases_pair_paths, expression_pairs, expressions_track, choice)
- overlap_total_duration_A(databases_name, databases_pairs, databases_pair_paths, expression_pairs, expressions_track, choice)
- overlap_total_duration_union(databases_name, databases_pairs, databases_pair_paths, expression_pairs, expressions_track, choice)
- overlap_percentage_B(databases_name, databases_pairs, expression_pairs, databases_pair_paths, expressions_track, choice)
- overlap_percentage_A(databases_name, databases_pairs, expression_pairs, databases_pair_paths, expressions_track, choice)
- overlap_percentage_union(databases_name, databases_pairs, expression_pairs, databases_pair_paths, expressions_track, choice)