# Implementation Example of [`stanza_batch`](https://github.com/apmoore1/stanza-batch) module


install via:
```{commandline}
pip install stanza-batch
```

In [1]:
import stanza

from typing import List
from stanza.models.common.doc import Document
from stanza_batch import batch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Documents to process
document_1 = 'Hello how are you\n\nI am good thank you'
document_2 = 'This is a different document'
# Create the document batch
batch_document = '\n\n'.join([document_1, document_2])
# Download Stanza English model
#NOTE: model only needs to be downloaed once
#// stanza.download("en")
# stanza model
nlp = stanza.Pipeline(lang="en", processors="tokenize",)
stanza_batch = nlp(batch_document)
print('stanza_batch is type Document?', isinstance(stanza_batch, Document))
stanza_document = 'Hello how are you\n\nI am good thank you\n\nThis is a different document'
print('text in stanza_batch is same as in stanza_document?',stanza_batch.text == stanza_document)
print('3 sentences after processing?',len(stanza_batch.sentences) == 3)

2022-07-14 13:49:54 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |

2022-07-14 13:49:54 INFO: Use device: cpu
2022-07-14 13:49:54 INFO: Loading: tokenize
2022-07-14 13:49:54 INFO: Done loading processors!


stanza_batch is type Document? True
text in stanza_batch is same as in stanza_document? True
3 sentences after processing? True


In [3]:

# Using Stanza Batch
stanza_documents: List[Document] = []
# Default batch size is 32
for document in batch([document_1, document_2], nlp, batch_size=32):
    stanza_documents.append(document)
print('2 documents after processing?  ',len(stanza_documents) == 2)  # 2 Documents after processing
print('Each document contains the same raw text after processing?:')
print(stanza_documents[0].text == document_1)
print(stanza_documents[1].text == document_2)
print('Each document contains the expected number of sentences?:')
print(len(stanza_documents[0].sentences) == 2)
print(len(stanza_documents[1].sentences) == 1)

2 documents after processing?   True
Each document contains the same raw text after processing?:
True
True
Each document contains the expected number of sentences?:
True
True
