Skip to content

Commit

Permalink
First load
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Dec 12, 2017
1 parent 3700777 commit 0bdbd1f
Show file tree
Hide file tree
Showing 11 changed files with 2,406 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dist/
downloads/
eggs/
.eggs/
.idea/
lib/
lib64/
parts/
Expand Down
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
language: python
python:
- "3.4.5"
- "3.5"
- "3.6"

# command to install dependencies
install:
- pip install -r requirements.txt
- pip install coveralls

# command to run tests
script:
- coverage run --source=pyperseus_treebank setup.py test
after_success:
- coveralls
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
# pyperseus-treebank
# pyperseus-treebank

[![Coverage Status](https://coveralls.io/repos/github/PonteIneptique/pyperseus-treebank/badge.svg?branch=master)](https://coveralls.io/github/PonteIneptique/pyperseus-treebank?branch=master)
[![Build Status](https://travis-ci.org/PonteIneptique/pyperseus-treebank.svg?branch=master)](https://travis-ci.org/PonteIneptique/pyperseus-treebank)
Empty file added pyperseus_treebank/__init__.py
Empty file.
21 changes: 21 additions & 0 deletions pyperseus_treebank/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class Token:
def __init__(self, index, form, lemma, parent=0, features="", pos=None, rel="ROOT"):
self.index = index
self.form = form
self.lemma = lemma
self.parent = parent
self.pos = pos
self.rel = rel
self.features = self.parse_features(features)

def parse_features(self, features):
""" Parse features from the POSTAG of Perseus Latin XML
.. example :: self.parse_features("n-p---na-")
:param features: A string containing morphological informations
:type features: str
:return: Parsed features
:rtype: dict
"""
raise NotImplementedError("Parse Features has not been implemented for this class")
66 changes: 66 additions & 0 deletions pyperseus_treebank/latin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
..source :: https://github.com/perseids-project/perseids_treebanking/blob/ae0305138dacc4a89c5fe6b0f086a4b3b1efdc92/transformations/aldt-util.xsl
"""
from .base import Token


_NUMBER = {"s": "Sing", "p": "Plur"}
_TENSE = {"p": "Pres", "f": "Fut", "r": "Perf", "l": "PQP", "i": "Imp"}
_MOOD = {"i": "Ind", "s": "Sub", "m": "Imp", "g": "Ger", "p": "Part", "u": "Sup", "n": "Inf"}
_VOICE = {"a": "Act", "p": "Pass", "d": "Dep"}
_GENDER = {"f": "Fem", "m": "Masc", "n": "Neut", "c": "Com"}
_CASE = {"g": "Gen", "d": "Dat", "a": "Acc", "v": "Voc", "n": "Nom", "b": "Abl", "i": "Ins", "l": "Loc"}
_DEGREE = {"p": "Pos", "c": "Comp", "s": "Sup"}


class LatinToken(Token):

def parse_features(self, features):
""" Parse features from the POSTAG of Perseus Latin XML
.. example :: self.parse_features("n-p---na-")
:param features: A string containing morphological informations
:type features: str
:return: Parsed features
:rtype: dict
"""

feats = {}

self.pos = features[0]

# Person handling : 3 possibilities
if features[1] != "-":
feats["Person"] = features[1]

# Number handling : two possibilities
if features[2] != "-":
feats["Number"] = _NUMBER[features[2]]

# Tense
if features[3] != "-":
feats["Tense"] = _TENSE[features[3]]

# Mood
if features[4] != "-":
feats["Mood"] = _MOOD[features[4]]

# Voice
if features[5] != "-":
feats["Voice"] = _VOICE[features[5]]

# Tense
if features[6] != "-":
feats["Gender"] = _GENDER[features[6]]

# Tense
if features[7] != "-":
feats["Case"] = _CASE[features[7]]

# Degree
if features[8] != "-":
feats["Degree"] = _DEGREE[features[8]]

return feats
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
lxml
21 changes: 21 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from setuptools import setup, find_packages

version = "0.0.1"

setup(
name='pyperseus_treebank',
version=version,
packages=find_packages(exclude=["tests"]),
url='https://github.com/ponteineptique/pyperseus-treebank',
license='GNU GPL v2',
author='Thibault Clerice',
author_email='leponteineptique@gmail.com',
description='Perseus XML Treebank Parser',
test_suite="tests",
install_requires=[
"lxml"
],
test_requires=[
"coverage==4.4.1"
]
)
Empty file added tests/__init__.py
Empty file.
2,256 changes: 2,256 additions & 0 deletions tests/test_data/tb.latin.xml

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions tests/test_latin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pyperseus_treebank.latin import LatinToken
from unittest import TestCase


class TestLatinToken(TestCase):
def test_features(self):
"""
Source : <word id="4" form="cano" lemma="cano" postag="v1spia---" relation="PRED" head="0"/>
:return:
"""
cano = LatinToken(4, "cano", "cano", 0, "v1spia---", rel="HEAD")
self.assertEqual(cano.pos, "v")
self.assertEqual(cano.features, {
"Person": "1",
"Number": "Sing",
"Tense": "Pres",
"Mood": "Ind",
"Voice": "Act"
})

0 comments on commit 0bdbd1f

Please sign in to comment.