In [55]:
import os
import sys
import time
import codecs
import roots
# Author : Aghilas SINI
# This code is inspired from TextGrid.py Class Source Code

class Roots2Merlin(object):
    def __init__(self,roots_file_name):
        self.roots_file_name=roots_file_name
        
    def __str__(self):
        return self.roots_file_name
    def __repr__(self):
        return self.roots_file_name
    
    def processing(self,utts):
        for utt in utts:
            UttByUtt(utt).get_segment_context()
            utt.destroy()
    
    
    
    
    def load_roots_file(self):
        try:
            corpus=roots.Corpus()
            corpus.load(self.roots_file_name)
            nbutts=corpus.count_utterances()
            return corpus.get_utterances(0,nbutts)
        except FileNotFoundError:
            print("This {} doesn't exits please checkout!".format(self.roots_file_name))
            

    
            

            
            
            
class UttByUtt(object):
    def __init__(self,utt,list_sequences_name=None):
        #self.list_sequences_name=list_sequences_name
        self.segments=utt.get_sequence('Time Segment JTrans').as_segment_sequence().get_all_items()
        self.syllables=utt.get_sequence('Syllable').as_syllable_sequence().get_all_items()
        
    # beg end
    def get_time_segment(self,segment):
        return [segment.as_acoustic_TimeSegment().get_segment_start(),segment.as_acoustic_TimeSegment().get_segment_end()]
    # p1ˆp 2-p3+p4=p5
    
    def get_quinphon(self, iseg):
        c_segment_name=self.get_phoneme_name(self.segments[iseg])
        if iseg==0:
            ll_segment_name='x'
            l_segment_name='x'
        elif iseg == 1:
            ll_segment_name='x'
            l_segment_name=self.get_phoneme_name(self.segments[iseg-1])
        else:
            ll_segment_name=self.get_phoneme_name(self.segments[iseg-2])
            l_segment_name=self.get_phoneme_name(self.segments[iseg-1])
            
        
        if iseg==len(self.segments)-1:
            r_segment_name='x'
            rr_segment_name='x'
        elif iseg==len(self.segments)-2:
            r_segment_name=self.get_phoneme_name(self.segments[iseg+1])
            rr_segment_name='x'
        else:
            r_segment_name=self.get_phoneme_name(self.segments[iseg+1])
            rr_segment_name=self.get_phoneme_name(self.segments[iseg+2])
        #by default it in sampa (for now)
        return [ll_segment_name,l_segment_name,c_segment_name,r_segment_name,rr_segment_name]
    
    
    def get_phoneme_name(self,seg):
        phones_segment=seg.get_related_items('Phone JTrans')
        if len(phones_segment)>0:
            return phones_segment[0].to_string()
        else:
            return "sil"
     
    
    def get_position(self,item,sub_item_name,sub_item_seq_name):
        item_subseq=item.get_related_items(sub_item_seq_name)
        item_length=len(item_subseq)
        for isub_item,sub_item in enumerate(item_subseq):
            if sub_item.to_string()==sub_item_name:
                return [isub_item+1,item_length-isub_item]

    #p6-p7
    
    #/A:a1_a2_a3/B:b1-b2-b3
    def get_syllable_structure(self,syllable):
        return len(syllable.get_related_items('Phone JTrans'))
    
    #@b4-b5&b6-b7
    def get_syllable_postion(self,syllable):
        
        
        syl_wrd_fwd,syl_wrd_bwd=self.get_position(syllable.get_related_items('Breath Group')[0],syllable.to_string(),'Syllable')
    
        syl_phrase_fwd,syl_phrase_bwd=self.get_position(syllable.get_related_items('Word JTrans')[0],syllable.to_string(),'Syllable')
        
        
        return [syl_wrd_fwd,syl_wrd_bwd,syl_phrase_fwd,syl_phrase_bwd]
    
    #|b16
    def get_syl_last_phone(self,syllable):
        return syllable.get_related_items('Phone JTrans')[-1].to_string()
        
        
    
    def get_segment_context(self):
        prev_syl_struct='x'
        for iseg, segment in enumerate(self.segments):
            c_phone_context=self.get_quinphon(iseg)
            
            syllables=segment.get_related_items('Syllable')
            if len(syllables)>0:
                icur_syl=syllables[0].get_in_sequence_index()
                cur_syl=self.syllables[icur_syl]
                segpostion=self.get_position(cur_syl,c_phone_context[2],'Phone JTrans')
                if icur_syl!=0:
                    prev_syl_struct=self.get_syllable_structure(self.syllables[icur_syl-1])
                else:
                    prev_syl_struct="x"
                cur_syl_struct=self.get_syllable_structure(self.syllables[icur_syl])
                cur_syl_pos=self.get_syllable_postion(cur_syl)
                cur_last_phone=self.get_syl_last_phone(cur_syl)
                
            else:
                cur_syl_struct='x'
                cur_syl_pos='x'
                cur_last_phone='x'
            prev_syl_struct=cur_syl_struct
            
            
    
    
    def check_out(self):
        return [self.utt.is_valid_sequence(nseq) for nseq in self.list_sequences_name]
    
    

In [56]:
roots2merlin=Roots2Merlin('/home/aghilas/Workspace/Corpora/Audiobooks/fr/Speakers/Nadine/utts/flaubert_madamebovary_01_syl.json')
utts=roots2merlin.load_roots_file()
roots2merlin.processing(utts)

[1, 6]
[2, 5]
[3, 4]
[4, 3]
[5, 2]
[2, 5]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 5]
[2, 4]
[3, 3]
[4, 2]
[1, 5]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]

[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[1, 3]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]

[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 4]
[2, 3]
[3, 2]
[1, 4]
[1, 2]
[2, 1]
[1, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[1, 4]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]

[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[1, 3]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]

[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]

[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]

[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]

[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[4, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 4]
[2, 3]
[3, 2]
[2, 3]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 2]
[2, 1]
[1, 5]
[2, 4]
[3, 3]
[4, 2]
[5, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 1]
[1, 3]
[2, 2]
[3, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 2]
[2, 1]
[1, 3]
[2, 2]
[3, 1]