# -*- coding: utf-8 -*- """ Created on Thu Nov 12 11:19:27 2015 @author: yuyuan """ import os import codecs # Set your own model path MODELDIR="/Workingspace/ltp-master/ltp_data" from pyltp import Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller #set the working directory for input and output outputpath='/Workingspace/machinetranslations_annotated' path='/Workingspace/machinetranslations_segmented/segmented' # read filenames into a list filenames=os.listdir(path) #load the models for word semgentor, POStagger, Parse, NER and SRL. #Loading segmenter model segmentor = Segmentor() segmentor.load(os.path.join(MODELDIR, "cws.model")) #Loading POStagger model postagger = Postagger() postagger.load(os.path.join(MODELDIR, "pos.model")) #Loading Parser model parser = Parser() parser.load(os.path.join(MODELDIR, "parser.model")) #Loading NER model recognizer = NamedEntityRecognizer() recognizer.load(os.path.join(MODELDIR, "ner.model")) #Loading SRL model labeller = SementicRoleLabeller() labeller.load(os.path.join(MODELDIR, "srl/")) #Iterate over the directory to process file one by one for file in filenames: #print out each file is being processed print('Readling file %s:'%(file)) with codecs.open(os.path.join(path, file), 'r', 'utf-8', errors='ignore') as translation: #iterate over the file line by line for line in translation: #processing the line sequentially words = segmentor.segment(line) postags = postagger.postag(words) arcs = parser.parse(words, postags) netags = recognizer.recognize(words, postags) roles = labeller.label(words, postags, netags, arcs) #Write the result to a file print('Writing result to a file...') with codecs.open(os.path.join(outputpath, file[0:3]+'.srl'), 'a', 'utf-8') as result: result.write('\t'.join(words)+'\n') result.write('\t'.join(postags)+'\n') result.write("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)+'\n') result.write("\t".join(netags)+'\n') for role in roles: result.write("".join(["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments])+'\n') #clear the loaded models from memory segmentor.release() postagger.release() parser.release() recognizer.release() labeller.release()