In [2]:
import xml.etree.ElementTree as ET
import re

In [13]:
def parse_times(string):
    start_re = "\#id\((.*?)\)" 
    stop_re =  "\.\.id\((.*)\)"
    
    start = re.search(start_re,string).groups(0)[0]
    if re.search(stop_re,string):
        stop = re.search(stop_re,string).groups(0)[0]
    else:
        stop = start
    return {'start': start, 'stop': stop}

def get_role(time,root_roles):
    for role in root_roles.getchildren():
        if(role.attrib['endtime'] > time):
            return role.attrib['type']
            

dataset = []
meetings = {}

meeting_ids = ['ES2002d','ES2003b','ES2003d','ES2004b','ES2004d','ES2005b','ES2005d','ES2006b','ES2006d','ES2007b','ES2007d']
participant_ids = ['A','B','C','D']
nsmap = {'nite':'http://nite.sourceforge.net/'}

for meeting_id in meeting_ids:
    meetings[meeting_id] = []
    for participant_id in participant_ids:
        root_act = ET.parse('dialogueActs/{}.{}.dialog-act.xml'.format(meeting_id,participant_id)).getroot()
        root_words = ET.parse('words/{}.{}.words.xml'.format(meeting_id,participant_id)).getroot()
        for act in root_act.getchildren():
            markers = parse_times(act.find('nite:child',nsmap).attrib['href'])
            starttime = root_words.find('*[@nite:id="{}"]'.format(markers['start']),nsmap).attrib['starttime']
            endtime = root_words.find('*[@nite:id="{}"]'.format(markers['stop']),nsmap).attrib['endtime']
            meetings[meeting_id].append({'participant': participant_id, 'starttime': starttime, 'endtime': endtime })

In [8]:
class Meeting:
    
    def __init__(self,meet):
        meeting = []
        for part in meet:
            meeting.append({"start_time": part['starttime'], "stop_time": part['endtime'], "participant": part['participant']})
        self.meeting = meeting
    
    def participations(self,p_id):
        return list(filter(lambda item: item['participant'] == p_id, self.meeting))
    
    def participations_by_role(self,p_id,role):
        return list(filter(lambda item: item['participant'] == p_id and item['role'] == role, self.meeting))
    
    def participations_not_by_role(self,p_id,role):
        return list(filter(lambda item: item['participant'] == p_id and item['role'] != role, self.meeting))
        
    def participations_count(self,p_id):
        return len(self.participations(p_id))
    
    def participation_time(self,p_id):
        time = []
        parts = self.participations(p_id)
        for part in parts:
            time.append(float(part['stop_time']) - float(part['start_time']))
        return time
    
    def protagonic_participation_time(self,p_id):
        time = 0
        parts = self.participations_by_role(p_id,"Protagonist")
        for part in parts:
            time += (float(part['stop_time']) - float(part['start_time']))
        return time
    
    def protagonic_participation_time_prc(self,p_id):
        return 100 * (self.protagonic_participation_time(p_id) / float(self.participations(p_id)[-1]["stop_time"]))
    
    def protagonism_prc(self,p_id):
        return 100 * (len(self.participations_by_role(p_id,"Protagonist")) / len(self.participations(p_id)))
    
    def not_protagonism_prc(self,p_id):
        return 100 * (len(self.participations_not_by_role(p_id,"Protagonist")) / len(self.participations(p_id)))


In [14]:
for meeting_id in meeting_ids:
    print('Meeting ID: ',meeting_id)
    print()
    pt = sum(Meeting(meetings[meeting_id]).participation_time('A'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('B'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('C'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('D'))

    ct = float(meetings[meeting_id][-1]['endtime'])*4
    st = ct - pt
    print('Silence total time: ',st)
    print('Silence total prc: ',st/ct)
    print()

Meeting ID:  ES2002d

Silence total time:  6861.495000000008
Silence total prc:  0.7289350730476029

Meeting ID:  ES2003b

Silence total time:  6240.877999999999
Silence total prc:  0.754972926415012

Meeting ID:  ES2003d

Silence total time:  6369.466999999999
Silence total prc:  0.7303684162728231

Meeting ID:  ES2004b

Silence total time:  6998.8600000000015
Silence total prc:  0.7519220108380357

Meeting ID:  ES2004d

Silence total time:  6646.342000000001
Silence total prc:  0.7503852215814408

Meeting ID:  ES2005b

Silence total time:  6711.475
Silence total prc:  0.7317894261676618

Meeting ID:  ES2005d

Silence total time:  4972.249000000002
Silence total prc:  0.7480800941221785

Meeting ID:  ES2006b

Silence total time:  6447.777000000002
Silence total prc:  0.7395053813263847

Meeting ID:  ES2006d

Silence total time:  5522.894
Silence total prc:  0.702068746345308

Meeting ID:  ES2007b

Silence total time:  5019.136000000001
Silence total prc:  0.7583746834524985

Meeting I