Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
danny911kr committed Jul 23, 2019
1 parent f0f244c commit c2dc4b6
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
16 changes: 15 additions & 1 deletion annotation/AlpacaTag/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,22 @@
from django.contrib.auth.models import User
from django.contrib.staticfiles.storage import staticfiles_storage
from .utils import get_key_choices

import spacy
nlp = spacy.load('en_core_web_sm')
from spacy.tokens import Doc

class WhitespaceTokenizer(object):
def __init__(self, vocab):
self.vocab = vocab

def __call__(self, text):
words = text.split(' ')
# All tokens 'own' a subsequent space character in this tokenizer
spaces = [True] * len(words)
return Doc(self.vocab, words=words, spaces=spaces)

nlp = spacy.load("en_core_web_sm")
nlp.tokenizer = WhitespaceTokenizer(nlp.vocab)


class Project(models.Model):
Expand Down
1 change: 1 addition & 0 deletions annotation/AlpacaTag/server/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import spacy
from spacy.tokens import Doc

class WhitespaceTokenizer(object):
def __init__(self, vocab):
self.vocab = vocab
Expand Down

0 comments on commit c2dc4b6

Please sign in to comment.