In [19]:
import syft as sy
import torch
import syfertext
from syfertext.tokenizer import Tokenizer
from syfertext.vocab import Vocab
import pickle 
from syft.generic.string import String

hook = sy.TorchHook(torch)

me = hook.local_worker



In [20]:
me.is_client_worker = False
me.verbose = False

In [21]:
bob = sy.VirtualWorker(hook = hook, id = "bob", verbose = False)
alice = sy.VirtualWorker(hook = hook, id = "alice", verbose = False)
james = sy.VirtualWorker(hook = hook, id = "james", verbose = False)

## Create Local Pipeline

In [22]:
nlp = syfertext.create(model_name = "syfertext_sentiment")

In [23]:
# Create the tokenizer
tokenizer = Tokenizer(suffixes = [''], 
                      prefixes = ["("], 
                      infixes = None, 
                      exceptions = {"melo": [{"ORTH":"me"}, {"ORTH":"lo"}]}
                     )

In [24]:
## Prepare vectors to create the vocabulary

# Replace with pathes on your machine
hash2row_path = './../../language_models/syfertext_en_core_web_lg/syfertext_en_core_web_lg/data/key2row'
vectors_path = './../../language_models/syfertext_en_core_web_lg/syfertext_en_core_web_lg/data/vectors'

# Load the key2row dict
with open(hash2row_path, "rb") as hash2row_file:
    hash2row = pickle.load(hash2row_file)
    
# Load the vectors
with open(vectors_path, "rb") as vectors_file:
    vectors = pickle.load(vectors_file)

In [39]:
hash2row

{15748096671724166044: 303,
 17130787002665352158: 197,
 7803463073553212047: 4790,
 2570228442999028682: 2,
 7601031270107461210: 1,
 2333086044707505052: 64,
 17583811671891633967: 13004,
 16386776449441186651: 153473,
 17917450762234962061: 90212,
 2845569251850464646: 4890,
 6441753959142598228: 4589,
 11219611539707024218: 13230,
 6174446835319589640: 15533,
 13726421824593857419: 20011,
 1504634659258364126: 11242,
 16574276769445059825: 223926,
 7229085451700080888: 2103,
 13398808308607858859: 12860,
 10584754210639909977: 684823,
 13533255182131191051: 15807,
 15291897178858464945: 12234,
 5645781836273801443: 74739,
 14842797775714447761: 184394,
 16296342147116913380: 49979,
 13009980616664168188: 146852,
 648685765859229059: 37622,
 1532234788270237555: 138949,
 3160290493673410775: 5778,
 7192950948311231314: 55492,
 898812733540616898: 74660,
 5065724982986493403: 7029,
 12145149956222337371: 11285,
 2577432275152706222: 1305,
 10614879281046276072: 31329,
 11631803523755

In [26]:
vectors.shape

(684831, 300)

In [27]:
vocab = Vocab(hash2row = hash2row, vectors=vectors)

In [28]:
nlp.set_tokenizer(tokenizer, access = {'*'})

In [29]:
nlp.set_vocab(vocab, access = {'*'})

In [30]:
me._objects

{'syfertext_sentiment:tokenizer': State>None,
 'syfertext_sentiment:vocab': State>None}

In [31]:
nlp.pipeline_template

[{'name': 'tokenizer', 'class_name': 'Tokenizer'}]

In [32]:
nlp.states

{'tokenizer': {'state': State>None, 'access': {'*'}},
 'vocab': {'state': State>None, 'access': {'*'}}}

## Use Local Pipeline

In [33]:
text = String("I am (ok-t) he,y $hi St. I'ma melo")

In [16]:
text  = text.send(bob)

In [34]:
bob._objects

{32705728829: "I am (ok-t) he,y $hi St. I'ma melo",
 20668990334: SubPipeline[tokenizer],
 64579122742: [StatePointer | bob:64579122742 -> me:syfertext_sentiment:vocab],
 'syfertext_sentiment:vocab': State>None,
 87214040163: [StatePointer | bob:87214040163 -> me:syfertext_sentiment:tokenizer],
 'syfertext_sentiment:tokenizer': State>None,
 41998025104: Doc>None}

In [35]:
doc = nlp(text)

I [ 1.8733e-01  4.0595e-01 -5.1174e-01 -5.5482e-01  3.9716e-02  1.2887e-01
  4.5137e-01 -5.9149e-01  1.5591e-01  1.5137e+00 -8.7020e-01  5.0672e-02
  1.5211e-01 -1.9183e-01  1.1181e-01  1.2131e-01 -2.7212e-01  1.6203e+00
 -2.4884e-01  1.4060e-01  3.3099e-01 -1.8061e-02  1.5244e-01 -2.6943e-01
 -2.7833e-01 -5.2123e-02 -4.8149e-01 -5.1839e-01  8.6262e-02  3.0818e-02
 -2.1253e-01 -1.1378e-01 -2.2384e-01  1.8262e-01 -3.4541e-01  8.2611e-02
  1.0024e-01 -7.9550e-02 -8.1721e-01  6.5621e-03  8.0134e-02 -3.9976e-01
 -6.3131e-02  3.2260e-01 -3.1625e-02  4.3056e-01 -2.7270e-01 -7.6020e-02
  1.0293e-01 -8.8653e-02 -2.9087e-01 -4.7214e-02  4.6036e-02 -1.7788e-02
  6.4990e-02  8.8451e-02 -3.1574e-01 -5.8522e-01  2.2295e-01 -5.2785e-02
 -5.5981e-01 -3.9580e-01 -7.9849e-02 -1.0933e-02 -4.1722e-02 -5.5576e-01
  8.8707e-02  1.3710e-01 -2.9873e-03 -2.6256e-02  7.7330e-02  3.9199e-01
  3.4507e-01 -8.0130e-02  3.3451e-01  2.7063e-01 -2.4544e-02  7.2576e-02
 -1.8120e-01  2.3693e-01  3.9977e-01  4.5012e-01 

In [38]:
for token in doc:
    print(token.text, token.vector)

I [ 1.8733e-01  4.0595e-01 -5.1174e-01 -5.5482e-01  3.9716e-02  1.2887e-01
  4.5137e-01 -5.9149e-01  1.5591e-01  1.5137e+00 -8.7020e-01  5.0672e-02
  1.5211e-01 -1.9183e-01  1.1181e-01  1.2131e-01 -2.7212e-01  1.6203e+00
 -2.4884e-01  1.4060e-01  3.3099e-01 -1.8061e-02  1.5244e-01 -2.6943e-01
 -2.7833e-01 -5.2123e-02 -4.8149e-01 -5.1839e-01  8.6262e-02  3.0818e-02
 -2.1253e-01 -1.1378e-01 -2.2384e-01  1.8262e-01 -3.4541e-01  8.2611e-02
  1.0024e-01 -7.9550e-02 -8.1721e-01  6.5621e-03  8.0134e-02 -3.9976e-01
 -6.3131e-02  3.2260e-01 -3.1625e-02  4.3056e-01 -2.7270e-01 -7.6020e-02
  1.0293e-01 -8.8653e-02 -2.9087e-01 -4.7214e-02  4.6036e-02 -1.7788e-02
  6.4990e-02  8.8451e-02 -3.1574e-01 -5.8522e-01  2.2295e-01 -5.2785e-02
 -5.5981e-01 -3.9580e-01 -7.9849e-02 -1.0933e-02 -4.1722e-02 -5.5576e-01
  8.8707e-02  1.3710e-01 -2.9873e-03 -2.6256e-02  7.7330e-02  3.9199e-01
  3.4507e-01 -8.0130e-02  3.3451e-01  2.7063e-01 -2.4544e-02  7.2576e-02
 -1.8120e-01  2.3693e-01  3.9977e-01  4.5012e-01 

## Deploy Local Pipeline to PyGrid

In [17]:
nlp.deploy(worker=james)

In [18]:
james._objects

{'syfertext_sentiment': LanguageModel>None,
 52728930622: [StatePointer | james:52728930622 -> me:syfertext_sentiment:tokenizer],
 'syfertext_sentiment:tokenizer': State>None,
 47401034694: [StatePointer | james:47401034694 -> me:syfertext_sentiment:vocab],
 'syfertext_sentiment:vocab': State>None}

In [19]:
bob._objects

{2303266988: "I am (ok-t) he,y $hi St. I'ma melo",
 79638890028: SubPipeline[tokenizer],
 42101469372: [StatePointer | bob:42101469372 -> me:syfertext_sentiment:vocab],
 'syfertext_sentiment:vocab': State>None,
 64031195580: [StatePointer | bob:64031195580 -> me:syfertext_sentiment:tokenizer],
 'syfertext_sentiment:tokenizer': State>None,
 3512105756: Doc>None}

In [19]:
me._objects

{'syfertext_sentiment:tokenizer': State>None,
 'syfertext_sentiment:vocab': State>None}

In [20]:
for key in list(me._objects):
    del me._objects[key]
    
for key in list(bob._objects):
    del bob._objects[key]
    
for key in list(alice._objects):
    del alice._objects[key]

In [21]:
bob._objects

{}

In [22]:
me._objects

{}

In [23]:
james._objects

{'syfertext_sentiment': LanguageModel>None,
 49343262017: [StatePointer | james:49343262017 -> me:syfertext_sentiment:tokenizer],
 'syfertext_sentiment:tokenizer': State>None,
 9769539393: [StatePointer | james:9769539393 -> me:syfertext_sentiment:vocab],
 'syfertext_sentiment:vocab': State>None}

## Load/Use PyGrid Pipeline

In [20]:
nlp1 = syfertext.load(model_name='syfertext_sentiment')

In [22]:
me._objects

{'syfertext_sentiment:tokenizer': State>None,
 'syfertext_sentiment:vocab': State>None,
 45084684011: [LanguageModelPointer | me:45084684011 -> james:syfertext_sentiment],
 'syfertext_sentiment': LanguageModel>None}

In [23]:
nlp1.pipeline_template

[{'name': 'tokenizer', 'class_name': 'Tokenizer'}]

In [24]:
text = String("I am (ok-t) he,y $hi St. I'ma melo")
text_ptr = text.send(alice)

In [25]:
doc = nlp1(text_ptr)

I
am
(
ok-t)
he,y
$hi
St.
I'ma
me
lo


In [26]:
alice._objects

{55029703255: "I am (ok-t) he,y $hi St. I'ma melo",
 77258055077: SubPipeline[tokenizer],
 7274297445: [StatePointer | alice:7274297445 -> me:syfertext_sentiment:vocab],
 'syfertext_sentiment:vocab': State>None,
 66530135748: [StatePointer | alice:66530135748 -> me:syfertext_sentiment:tokenizer],
 'syfertext_sentiment:tokenizer': State>None,
 28616311982: Doc>None}

In [30]:
bob._objects

{}