Skip to content

Commit

Permalink
added shebang and doc
Browse files Browse the repository at this point in the history
  • Loading branch information
mommi84 committed Jun 9, 2018
1 parent c96d5f2 commit e7887c4
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions build_vocab.py 100644 → 100755
@@ -1,3 +1,7 @@
#!/usr/bin/env python
"""
Usage: python build_vocab.py data.en > vocab.en
"""
import numpy as np
from tensorflow.contrib import learn
import sys
Expand Down Expand Up @@ -44,6 +48,7 @@
## word with id i goes at index i of the list.
vocabulary = set(list(zip(*sorted_vocab))[0])

# split also by apostrophe
to_remove = set()
to_add = set()
for t0 in vocabulary:
Expand All @@ -56,8 +61,7 @@
for t0 in to_add:
vocabulary.add(t0)

# print(vocabulary)
# print(x)
# print terms
for v in vocabulary:
if v != "":
print v

0 comments on commit e7887c4

Please sign in to comment.