From cd9715226d7c02c757fe4eeaf8999e112e2ad6b8 Mon Sep 17 00:00:00 2001 From: grammarware Date: Fri, 10 Jul 2009 16:22:57 +0000 Subject: [PATCH] python code refactored; a small tool added (more to come) git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@661 ab42f6e0-554d-0410-b580-99e487e6eeb2 --- shared/python/diffShort.py | 16 +++ shared/python/lci.py | 189 +-------------------------------- shared/python/lciConfig.py | 211 +++++++++++++++++++++++++++++++++++++ shared/tools/gdtm | 22 ++++ shared/xsl/txl2bgf.xslt | 47 ++++++--- 5 files changed, 282 insertions(+), 203 deletions(-) create mode 100755 shared/python/diffShort.py create mode 100755 shared/python/lciConfig.py create mode 100755 shared/tools/gdtm diff --git a/shared/python/diffShort.py b/shared/python/diffShort.py new file mode 100755 index 00000000..04831411 --- /dev/null +++ b/shared/python/diffShort.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +import os +import sys +import string +import metrics + +if __name__ == "__main__": + if len(sys.argv) == 4: + pnm,psm = metrics.mismatches(sys.argv[3],sys.argv[1],sys.argv[2]) + print 'Mismatches:',pnm,'+',psm,'=',pnm+psm + else: + print '''This is the shortest possible version of Grammar Diff Tool. + +Usage:''' + print ' ',sys.argv[0],'','','' + sys.exit(1) diff --git a/shared/python/lci.py b/shared/python/lci.py index 38f6e935..79606bdc 100755 --- a/shared/python/lci.py +++ b/shared/python/lci.py @@ -4,117 +4,15 @@ import sys import glob from elementtree import ElementTree - -def stripSpecifics(lbl): - l = lbl[:] - if l.find('-')>0: - l = l.split('-')[0] - return l - -def stripCamelCase(lbl): - l='' - for x in lbl: - if x.islower() or x=='.': - l+=x - else: - break - return l - -class Chain: - def __init__(self,*arr): - self.array=[] - for a in arr: - self.array.append(a) - def __call__(self): - return '.'.join(self.array) - def __repr__(self): - return '-'+'.'.join(self.array)+'-' - # singular items are strings, slices are Chains - def __getitem__(self,key): - if type(key)==type(slice(0,1,None)): - return ChainFromArray(self.array[key]) - else: - return self.array[key] - def __len__(self): - return len(self.array) - def __eq__(self,other): - if type(other)==type(''): - return self.array==other.split('.') - else: - return self.array==other.array - def dotNodeName(self,target): - name = self.array[0] - for a in self.array[1:]: - name += '_'+stripSpecifics(a) - return name+'_'+target - def bgfFileName(self): - name = [self.array[0]] - for a in self.array[1:]: - if name[-1].isdigit(): - if name[-2]==stripCamelCase(a): - name[-1] = str(int(name[-1])+1) - else: - name.append(stripCamelCase(a)) - elif name[-1]==stripCamelCase(a): - name.append('2') - else: - name.append(stripCamelCase(a)) - return '.'.join(name)+'.bgf' - def futureBgfFileName(self,next): - tmp = self.array[:] - self.array.append(stripCamelCase(next)) - name = self.bgfFileName() - self.array = tmp - return name - def append(self,step): - self.array.append(step) - def spaceNotation(self): - tmp = self.array[:] - tmp.reverse() - return ' '.join(tmp) - def type(self): - t = 0 - for a in self.array[1:]: - if ttype[a] in ('preparation','nominal-matching','normalizing'): - t = -1 - if ttype[a] in ('structural-matching','extension','correction','relaxation'): - t = 1 - return t - def lastAction(self): - return self.array[-1] - -def ChainFromArray(a): - x = Chain() - x.array = a[:] - return x +from lciConfig import * # A global flag, if set, LCI will exit with a non-zero status problem = False # output streams redirected to null shutup = ' 1> /dev/null 2> /dev/null' -# transformation type per action: preparation, nominal-matching, normalizing, structural-matching, -# extension, correction, relaxation -ttype = {} -orderedsrc = [] -derived = {} -shortcuts = {} -actions = [] -autoactions = {} -testsets = {} -tester = {} -extractor = {} -treeExtractor = {} -treeEvaluator = {} -targets = {} -parser = {} -evaluator = {} -testset = [] graphBig = [] graphSmall = [] log = None -tools = {} -treeTools = {} -automethods = {} almostFailed = [] failed = [] @@ -126,90 +24,6 @@ def sysexit(n): log.close() sys.exit(n) -def readConfiguration (cfg): - config = ElementTree.parse(cfg) - # shortcuts - for xmlnode in config.findall('//shortcut'): - shortcuts[xmlnode.findtext('name')]=expandxml(xmlnode.findall('expansion')[0],{}) - # actions - for xmlnode in config.findall('//target/branch/*/perform'): - if xmlnode.text not in actions: - actions.append(xmlnode.text) - # automated actions - for xmlnode in config.findall('//target/branch/*/automated'): - if xmlnode.findtext('result') not in actions: - actions.append(xmlnode.findtext('result')) - autoactions[xmlnode.findtext('result')]=xmlnode.findtext('method') - # testset - for xmlnode in config.findall('//testset'): - testsets[xmlnode.findtext('name')]=expandxml(xmlnode.findall('command')[0],{}) - # sources - for xmlnode in config.findall('//source'): - orderedsrc.append(xmlnode.findtext('name')) - if xmlnode.findall('derived'): - derived[xmlnode.findtext('name')]=(xmlnode.findtext('derived/from'),xmlnode.findtext('derived/using')) - extractor[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/extraction')[0],{}) - if xmlnode.findall('grammar/parsing'): - parser[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/parsing')[0],{}) - if xmlnode.findall('grammar/evaluation'): - evaluator[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/evaluation')[0],{}) - if xmlnode.findall('tree/extraction'): - treeExtractor[xmlnode.findtext('name')]=expandxml(xmlnode.findall('tree/extraction')[0],{}) - if xmlnode.findall('tree/evaluation'): - treeEvaluator[xmlnode.findtext('name')]=expandxml(xmlnode.findall('tree/evaluation')[0],{}) - tmp = [] - for set in xmlnode.findall('testing/set'): - tmp.append(set.text) - tester[xmlnode.findtext('name')]=tmp[:] - # targets - for xmlnode in config.findall('//target'): - name = xmlnode.findtext('name') - targets[name]= [] - for br in xmlnode.findall('branch'): - for phase in br.findall('*'): - if phase.tag == 'input': - branch = Chain(br.findtext('input')) - else: - for p in phase.findall('*'): - if p.tag == 'perform': - branch.append(p.text) - ttype[p.text] = phase.tag - elif p.tag == 'automated': - branch.append(p.findtext('result')) - ttype[p.findtext('result')] = phase.tag - else: - print '[WARN] Unknown tag skipped:',p.tag - targets[name].append(branch) - # tools - for xmlnode in config.findall('//tool'): - tools[xmlnode.findtext('name')] = expandxml(xmlnode.findall('grammar')[0],{}) - if xmlnode.findall('tree'): - treeTools[xmlnode.findtext('name')] = expandxml(xmlnode.findall('tree')[0],{}) - # methods - for xmlnode in config.findall('//generator'): - automethods[xmlnode.findtext('name')] = expandxml(xmlnode.findall('command')[0],{}) - print 'Read', - if shortcuts: - print len(shortcuts),'shortcuts,', - if tools or treeTools: - print `len(tools)`+'+'+`len(treeTools)`,'tools,', - if actions: - if autoactions: - print len(actions),'actions ('+`len(autoactions)`,'automated),', - else: - print len(actions),'actions,', - if automethods: - print len(automethods),'generators,', - if targets: - print len(targets),'targets,', - if testsets: - print len(testsets),'test sets,', - if extractor: - print len(extractor),'sources,', - if parser or evaluator: - print len(parser),'parsers &',len(evaluator),'evaluators,', - print 'LCF is fine.' - def expandone(tag,text,rep): if text: wte = text @@ -716,6 +530,7 @@ def convergeTestSets(): diffBTFs(t) final = orderTargets()[-1] for evaluator in treeEvaluator.keys(): + #wtf? pass def runTestSets(): diff --git a/shared/python/lciConfig.py b/shared/python/lciConfig.py new file mode 100755 index 00000000..0af50d2a --- /dev/null +++ b/shared/python/lciConfig.py @@ -0,0 +1,211 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from elementtree import ElementTree + +# transformation type per action: preparation, nominal-matching, normalizing, structural-matching, +# extension, correction, relaxation +ttype = {} +orderedsrc = [] +derived = {} +shortcuts = {} +actions = [] +autoactions = {} +testsets = {} +tester = {} +extractor = {} +treeExtractor = {} +treeEvaluator = {} +targets = {} +parser = {} +evaluator = {} +testset = [] +tools = {} +treeTools = {} +automethods = {} + +class Chain: + def __init__(self,*arr): + self.array=[] + for a in arr: + self.array.append(a) + def __call__(self): + return '.'.join(self.array) + def __repr__(self): + return '-'+'.'.join(self.array)+'-' + # singular items are strings, slices are Chains + def __getitem__(self,key): + if type(key)==type(slice(0,1,None)): + return ChainFromArray(self.array[key]) + else: + return self.array[key] + def __len__(self): + return len(self.array) + def __eq__(self,other): + if type(other)==type(''): + return self.array==other.split('.') + else: + return self.array==other.array + def dotNodeName(self,target): + name = self.array[0] + for a in self.array[1:]: + name += '_'+stripSpecifics(a) + return name+'_'+target + def bgfFileName(self): + name = [self.array[0]] + for a in self.array[1:]: + if name[-1].isdigit(): + if name[-2]==stripCamelCase(a): + name[-1] = str(int(name[-1])+1) + else: + name.append(stripCamelCase(a)) + elif name[-1]==stripCamelCase(a): + name.append('2') + else: + name.append(stripCamelCase(a)) + return '.'.join(name)+'.bgf' + def futureBgfFileName(self,next): + tmp = self.array[:] + self.array.append(stripCamelCase(next)) + name = self.bgfFileName() + self.array = tmp + return name + def append(self,step): + self.array.append(step) + def spaceNotation(self): + tmp = self.array[:] + tmp.reverse() + return ' '.join(tmp) + def type(self): + t = 0 + for a in self.array[1:]: + if ttype[a] in ('preparation','nominal-matching','normalizing'): + t = -1 + if ttype[a] in ('structural-matching','extension','correction','relaxation'): + t = 1 + return t + def lastAction(self): + return self.array[-1] + +def ChainFromArray(a): + x = Chain() + x.array = a[:] + return x + +def readConfiguration (cfg): + config = ElementTree.parse(cfg) + # shortcuts + for xmlnode in config.findall('//shortcut'): + shortcuts[xmlnode.findtext('name')]=expandxml(xmlnode.findall('expansion')[0],{}) + # actions + for xmlnode in config.findall('//target/branch/*/perform'): + if xmlnode.text not in actions: + actions.append(xmlnode.text) + # automated actions + for xmlnode in config.findall('//target/branch/*/automated'): + if xmlnode.findtext('result') not in actions: + actions.append(xmlnode.findtext('result')) + autoactions[xmlnode.findtext('result')]=xmlnode.findtext('method') + # testset + for xmlnode in config.findall('//testset'): + testsets[xmlnode.findtext('name')]=expandxml(xmlnode.findall('command')[0],{}) + # sources + for xmlnode in config.findall('//source'): + orderedsrc.append(xmlnode.findtext('name')) + if xmlnode.findall('derived'): + derived[xmlnode.findtext('name')]=(xmlnode.findtext('derived/from'),xmlnode.findtext('derived/using')) + extractor[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/extraction')[0],{}) + if xmlnode.findall('grammar/parsing'): + parser[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/parsing')[0],{}) + if xmlnode.findall('grammar/evaluation'): + evaluator[xmlnode.findtext('name')]=expandxml(xmlnode.findall('grammar/evaluation')[0],{}) + if xmlnode.findall('tree/extraction'): + treeExtractor[xmlnode.findtext('name')]=expandxml(xmlnode.findall('tree/extraction')[0],{}) + if xmlnode.findall('tree/evaluation'): + treeEvaluator[xmlnode.findtext('name')]=expandxml(xmlnode.findall('tree/evaluation')[0],{}) + tmp = [] + for theset in xmlnode.findall('testing/set'): + tmp.append(theset.text) + tester[xmlnode.findtext('name')]=tmp[:] + # targets + for xmlnode in config.findall('//target'): + name = xmlnode.findtext('name') + targets[name]= [] + for br in xmlnode.findall('branch'): + for phase in br.findall('*'): + if phase.tag == 'input': + branch = Chain(br.findtext('input')) + else: + for p in phase.findall('*'): + if p.tag == 'perform': + branch.append(p.text) + ttype[p.text] = phase.tag + elif p.tag == 'automated': + branch.append(p.findtext('result')) + ttype[p.findtext('result')] = phase.tag + else: + print '[WARN] Unknown tag skipped:',p.tag + targets[name].append(branch) + # tools + for xmlnode in config.findall('//tool'): + tools[xmlnode.findtext('name')] = expandxml(xmlnode.findall('grammar')[0],{}) + if xmlnode.findall('tree'): + treeTools[xmlnode.findtext('name')] = expandxml(xmlnode.findall('tree')[0],{}) + # methods + for xmlnode in config.findall('//generator'): + automethods[xmlnode.findtext('name')] = expandxml(xmlnode.findall('command')[0],{}) + print 'Read', + if shortcuts: + print len(shortcuts),'shortcuts,', + if tools or treeTools: + print `len(tools)`+'+'+`len(treeTools)`,'tools,', + if actions: + if autoactions: + print len(actions),'actions ('+`len(autoactions)`,'automated),', + else: + print len(actions),'actions,', + if automethods: + print len(automethods),'generators,', + if targets: + print len(targets),'targets,', + if testsets: + print len(testsets),'test sets,', + if extractor: + print len(extractor),'sources,', + if parser or evaluator: + print len(parser),'parsers &',len(evaluator),'evaluators,', + print 'LCF is fine.' + +def expandone(tag,text,rep): + if text: + wte = text + else: + wte = tag.replace('expand-','') + if shortcuts.has_key(wte): + return shortcuts[wte] + elif rep.has_key(wte): + return rep[wte] + else: + # postpone expanding + return '%'+wte+'%' + +def expandxml(mixed,rep): + s = mixed.text + for tag in mixed.getchildren(): + s += expandone(tag.tag,tag.text,rep) + s += tag.tail + return s.strip() + +def stripSpecifics(lbl): + l = lbl[:] + if l.find('-')>0: + l = l.split('-')[0] + return l + +def stripCamelCase(lbl): + l='' + for x in lbl: + if x.islower() or x=='.': + l+=x + else: + break + return l diff --git a/shared/tools/gdtm b/shared/tools/gdtm new file mode 100755 index 00000000..e491e0fd --- /dev/null +++ b/shared/tools/gdtm @@ -0,0 +1,22 @@ +#!/bin/sh + +# Get our hands on basedir +LOCAL1=${PWD} +cd `dirname $0` +cd .. +LOCAL2=${PWD} +cd ${LOCAL1} + +if [ $# -ne 2 ]; then + echo "This is the shortest version of Grammar Diff Tool." + echo "Usage: gdtm " + exit 1 +else + for i in $*; do + if [ ! -r $i ]; then + echo "Oops: $i not found or not readable." + exit 1 + fi + done + python ${LOCAL2}/python/diffShort.py $1 $2 ${LOCAL2}/tools/gdt +fi diff --git a/shared/xsl/txl2bgf.xslt b/shared/xsl/txl2bgf.xslt index 5aa791f1..9aab296d 100644 --- a/shared/xsl/txl2bgf.xslt +++ b/shared/xsl/txl2bgf.xslt @@ -42,18 +42,18 @@ - - - - - - + + + + + + - - - + + + @@ -62,25 +62,40 @@ - - - + + + + + - - - + + + + + + + + + + + + + + + + - +