In [1]:
import xml.etree.ElementTree as ET
import re

In [2]:
tree_words = ET.parse('words/ES2002d.A.words.xml')
root_words = tree_words.getroot()

tree_act = ET.parse('dialogueActs/ES2002d.A.dialog-act.xml')
root_act = tree_act.getroot()

tree_roles = ET.parse('participantRoles/ES2002d.A.role.xml')
root_roles = tree_roles.getroot()

root_act.find('dact').find("child",{"items": "nite"})
nsmap = {'nite':'http://nite.sourceforge.net/'}
root_act.getchildren()[0].find('nite:child',nsmap).attrib['href']
root_words.find('w[@nite:id="ES2002d.A.words1"]',nsmap).attrib

{'{http://nite.sourceforge.net/}id': 'ES2002d.A.words1',
 'starttime': '59.39',
 'endtime': '59.39',
 'punc': 'true'}

In [8]:
def parse_times(string):
    start_re = "\#id\((.*?)\)" 
    stop_re =  "\.\.id\((.*)\)"
    
    start = re.search(start_re,string).groups(0)[0]
    if re.search(stop_re,string):
        stop = re.search(stop_re,string).groups(0)[0]
    else:
        stop = start
    return {'start': start, 'stop': stop}

def get_role(time,root_roles):
    for role in root_roles.getchildren():
        if(role.attrib['endtime'] > time):
            return role.attrib['type']
            

dataset = []
meetings = {}

meeting_ids = ['ES2002d','ES2008b','ES2008d','ES2009d','IS1003d']
participant_ids = ['A','B','C','D']

for meeting_id in meeting_ids:
    meetings[meeting_id] = []
    for participant_id in participant_ids:
        root_act = ET.parse('dialogueActs/{}.{}.dialog-act.xml'.format(meeting_id,participant_id)).getroot()
        root_roles = ET.parse('participantRoles/{}.{}.role.xml'.format(meeting_id,participant_id)).getroot()
        root_words = ET.parse('words/{}.{}.words.xml'.format(meeting_id,participant_id)).getroot()
        for act in root_act.getchildren():
            markers = parse_times(act.find('nite:child',nsmap).attrib['href'])
            starttime = root_words.find('*[@nite:id="{}"]'.format(markers['start']),nsmap).attrib['starttime']
            endtime = root_words.find('*[@nite:id="{}"]'.format(markers['stop']),nsmap).attrib['endtime']
            meetings[meeting_id].append({'participant': participant_id, 'starttime': starttime, 'endtime': endtime, 'role': get_role(endtime,root_roles) })

In [105]:
class Meeting:
    
    def __init__(self,meet):
        meeting = []
        for part in meet:
            meeting.append({"start_time": part['starttime'], "stop_time": part['endtime'], "participant": part['participant'], "role": part['role']})
        self.meeting = meeting
    
    def participations(self,p_id):
        return list(filter(lambda item: item['participant'] == p_id, self.meeting))
    
    def participations_by_role(self,p_id,role):
        return list(filter(lambda item: item['participant'] == p_id and item['role'] == role, self.meeting))
    
    def participations_not_by_role(self,p_id,role):
        return list(filter(lambda item: item['participant'] == p_id and item['role'] != role, self.meeting))
        
    def participations_count(self,p_id):
        return len(self.participations(p_id))
    
    def participation_time(self,p_id):
        time = []
        parts = self.participations(p_id)
        for part in parts:
            time.append(float(part['stop_time']) - float(part['start_time']))
        return time
    
    def protagonic_participation_time(self,p_id):
        time = 0
        parts = self.participations_by_role(p_id,"Protagonist")
        for part in parts:
            time += (float(part['stop_time']) - float(part['start_time']))
        return time
    
    def protagonic_participation_time_prc(self,p_id):
        return 100 * (self.protagonic_participation_time(p_id) / float(self.participations(p_id)[-1]["stop_time"]))
    
    def protagonism_prc(self,p_id):
        return 100 * (len(self.participations_by_role(p_id,"Protagonist")) / len(self.participations(p_id)))
    
    def not_protagonism_prc(self,p_id):
        return 100 * (len(self.participations_not_by_role(p_id,"Protagonist")) / len(self.participations(p_id)))


In [107]:
Meeting(meetings['ES2002d']).meeting

[{'start_time': '59.26',
  'stop_time': '59.39',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '71.75',
  'stop_time': '74.016',
  'participant': 'A',
  'role': 'Neutral'},
 {'start_time': '101.92',
  'stop_time': '106.23',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '106.39',
  'stop_time': '111.34',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '112.76',
  'stop_time': '115.76',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '115.76',
  'stop_time': '116.422',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '118.1',
  'stop_time': '121.31',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '121.31',
  'stop_time': '128.26',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '128.26',
  'stop_time': '128.91',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '128.91',
  'stop_time': '131.63',
  'participant': 'A',
  'role': 'Supporter'},
 {'start_time': '131.63',
  'stop_t

In [22]:
meeting = Meeting(meetings['ES2002d'])
print(meeting.protagonism_prc('B'))
print(meeting.protagonic_participation_time_prc('B'))

43.71134020618557
16.78869238522865


In [None]:
meetings = []
for meeting in meetings:
    meetings.append()

In [34]:
for meeting in meetings:
    print("Meeting: ",meeting)
    for p in ["A","B","C","D"]:
        print("Protagonism of", p, ": ", Meeting(meetings[meeting]).protagonism_prc(p))
    print()
    for p in ["A","B","C","D"]:
        print("Protagonistic Time of", p, ": ", Meeting(meetings[meeting]).protagonic_participation_time_prc(p))
    print()

Meeting:  ES2002d
Protagonism of A :  1.7361111111111112
Protagonism of B :  43.71134020618557
Protagonism of C :  0.0
Protagonism of D :  0.0

Protagonistic Time of A :  0.3571070987551338
Protagonistic Time of B :  16.78869238522865
Protagonistic Time of C :  0.0
Protagonistic Time of D :  0.0

Meeting:  ES2008b
Protagonism of A :  0.974025974025974
Protagonism of B :  2.510460251046025
Protagonism of C :  42.5531914893617
Protagonism of D :  27.27272727272727

Protagonistic Time of A :  0.4813429153320935
Protagonistic Time of B :  0.4314616019818347
Protagonistic Time of C :  12.134250457093197
Protagonistic Time of D :  10.123445710362528

Meeting:  ES2008d
Protagonism of A :  9.297520661157025
Protagonism of B :  0.0
Protagonism of C :  0.0
Protagonism of D :  0.0

Protagonistic Time of A :  3.2078218384623693
Protagonistic Time of B :  0.0
Protagonistic Time of C :  0.0
Protagonistic Time of D :  0.0

Meeting:  ES2009d
Protagonism of A :  11.195928753180661
Protagonism of B :  0

In [117]:
for meeting_id in meeting_ids:
    print('Meeting ID: ',meeting_id)
    print()
    pt = sum(Meeting(meetings[meeting_id]).participation_time('A'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('B'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('C'))
    pt += sum(Meeting(meetings[meeting_id]).participation_time('D'))

    ct = float(meetings[meeting_id][-1]['endtime'])*4
    st = ct - pt
    print('Silence total time: ',st)
    print('Silence total prc: ',st/ct)
    print()

Meeting ID:  ES2002d

Silence total time:  6861.495000000008
Silence total prc:  0.7289350730476029

Meeting ID:  ES2008b

Silence total time:  6973.494999999999
Silence total prc:  0.7823046564745052

Meeting ID:  ES2008d

Silence total time:  7867.3859999999995
Silence total prc:  0.7524566739354986

Meeting ID:  ES2009d

Silence total time:  5963.491000000003
Silence total prc:  0.7049377039103507

Meeting ID:  IS1003d

Silence total time:  5514.331000000001
Silence total prc:  0.6678629909358242

