In [1]:
import syft as sy
import torch
import syfertext
from syfertext.tokenizer import Tokenizer
from syfertext.vocab import Vocab
import pickle 
from syft.generic.string import String

hook = sy.TorchHook(torch)

me = hook.local_worker



In [2]:
me.is_client_worker = False
me.verbose = False

In [3]:
bob = sy.VirtualWorker(hook = hook, id = "bob", verbose = False)
alice = sy.VirtualWorker(hook = hook, id = "alice", verbose = False)
james = sy.VirtualWorker(hook = hook, id = "james", verbose = False)

## Create Local Pipeline

In [4]:
from syfertext.local_pipeline import get_test_language_model

In [5]:
# Inialize the language model
nlp = get_test_language_model()

In [5]:
# Create the tokenizer and the vocab
tokenizer = Tokenizer(exceptions = {"token": [{"ORTH":"to"}, {"ORTH":"ken"}]})
vocab = Vocab()

# Add them to the pipeline
nlp.set_tokenizer(tokenizer, access = {'*'})
nlp.set_vocab(vocab, access = {'*'})

In [10]:
from syfertext.pipeline import SimpleTagger

In [12]:
#Initialize a farly extensive list of stop words from https://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list#English

stopwords = ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", 
             "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", 
             "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", 
             "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't",
             "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", 
             "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", 
             "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", 
             "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", 
             "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "shan't", "she",
             "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's",
             "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they",
             "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under",
             "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't",
             "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom",
             "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've",
             "your" "yours" "yourself" "yourselves"]

In [13]:
stop_tagger = SimpleTagger(attribute = 'is_stop',
                           lookups = stopwords,
                           tag = True,
                           default_tag = False,
                           case_sensitive = False
                          )

In [15]:
nlp.add_pipe(name = 'stop tagger',
                 component = stop_tagger)

AttributeError: 'SimpleTagger' object has no attribute 'set_model_name'

## Use Local Pipeline

In [6]:
# Create a PySyft String and send it to bob
text = String("This is a token")
text  = text.send(bob)

In [7]:
bob._objects

{93876746765: 'This is a token'}

In [8]:
# Start tokenization
doc = nlp(text)

This
is
a
to
ken


## Deploy Local Pipeline to PyGrid

In [9]:
nlp.deploy(worker=james)

In [10]:
james._objects

{'syfertext_test': LanguageModel>None,
 75758711372: [StatePointer | james:75758711372 -> me:syfertext_test:tokenizer],
 'syfertext_test:tokenizer': State>None,
 26632514014: [StatePointer | james:26632514014 -> me:syfertext_test:vocab],
 'syfertext_test:vocab': State>None}

In [11]:
# Delete all objects except on James' machine
for key in list(me._objects):
    del me._objects[key]
    
for key in list(bob._objects):
    del bob._objects[key]
    
for key in list(alice._objects):
    del alice._objects[key]

## Load/Use PyGrid Pipeline

In [12]:
# Search and load from PyGrid
nlp1 = syfertext.load(model_name='syfertext_test')

In [13]:
# Create a PySyft string and send it to Alice
text = String("I need this token")
text_ptr = text.send(alice)

In [14]:
doc = nlp1(text_ptr)

I
need
this
to
ken
