In [1]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

text = "Elon Musk bought Twitter for 44 billion dollars."
doc = nlp(text)

print(f"{'Text':<15} {'Dep (‡∏´‡∏ô‡πâ‡∏≤‡∏ó‡∏µ‡πà)':<12} {'Head (‡∏Ñ‡∏≥‡∏´‡∏±‡∏ß‡∏´‡∏ô‡πâ‡∏≤)':<15} {'Children (‡∏Ñ‡∏≥‡∏•‡∏π‡∏Å‡∏ô‡πâ‡∏≠‡∏á)'}")
print("-" * 60)

for token in doc:
    print(f"{token.text:<15} {token.dep_:<12} {token.head.text:<15} {[child.text for child in token.children]}")

displacy.render(doc, style='dep', jupyter=True)

Text            Dep (‡∏´‡∏ô‡πâ‡∏≤‡∏ó‡∏µ‡πà) Head (‡∏Ñ‡∏≥‡∏´‡∏±‡∏ß‡∏´‡∏ô‡πâ‡∏≤) Children (‡∏Ñ‡∏≥‡∏•‡∏π‡∏Å‡∏ô‡πâ‡∏≠‡∏á)
------------------------------------------------------------
Elon            compound     Musk            []
Musk            nsubj        bought          ['Elon']
bought          ROOT         bought          ['Musk', 'Twitter', 'for', '.']
Twitter         dobj         bought          []
for             prep         bought          ['dollars']
44              compound     billion         []
billion         nummod       dollars         ['44']
dollars         pobj         for             ['billion']
.               punct        bought          []


In [7]:
import spacy
from pythainlp.tokenize import word_tokenize
class ThaiTokenizer:
    def __init__(self, vocab):
        self.vocab = vocab

    def __call__(self, text):
        words = word_tokenize(text) # ‡πÉ‡∏ä‡πâ PyThaiNLP ‡∏ï‡∏±‡∏î‡∏Ñ‡∏≥
        return spacy.tokens.Doc(self.vocab, words=words)

nlp = spacy.blank("th")
nlp.tokenizer = ThaiTokenizer(nlp.vocab)

doc = nlp("‡πÅ‡∏°‡∏ß‡∏™‡∏µ‡∏î‡∏≥‡∏Å‡∏¥‡∏ô‡∏õ‡∏•‡∏≤‡∏ó‡∏π")
print([token.text for token in doc])

['‡πÅ‡∏°‡∏ß', '‡∏™‡∏µ', '‡∏î‡∏≥', '‡∏Å‡∏¥‡∏ô', '‡∏õ‡∏•‡∏≤‡∏ó‡∏π']


In [None]:
import torch
import spacy_stanza
from spacy import displacy

# --- üõ†Ô∏è ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤ PyTorch 2.6+ ‡πÇ‡∏´‡∏•‡∏î‡πÇ‡∏°‡πÄ‡∏î‡∏• Stanza ‡πÑ‡∏°‡πà‡πÑ‡∏î‡πâ ---
# ‡πÄ‡∏£‡∏≤‡∏à‡∏∞‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡πÉ‡∏´‡πâ torch.load ‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡πÅ‡∏ö‡∏ö‡πÄ‡∏Å‡πà‡∏≤ (weights_only=False)
_original_load = torch.load

def strict_load_patch(*args, **kwargs):
    if 'weights_only' not in kwargs:
        kwargs['weights_only'] = False
    return _original_load(*args, **kwargs)

torch.load = strict_load_patch
# --------------------------------------------------------

nlp = spacy_stanza.load_pipeline("th")

text = "‡πÅ‡∏°‡∏ß‡∏™‡∏µ‡∏î‡∏≥‡∏Å‡∏¥‡∏ô‡∏õ‡∏•‡∏≤‡∏ó‡∏π‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏≠‡∏£‡πà‡∏≠‡∏¢"
doc = nlp(text)

print(f"{'‡∏Ñ‡∏≥':<15} {'‡∏£‡∏≤‡∏Å‡∏Ñ‡∏≥':<15} {'POS':<10} {'Dep':<15} {'Head'}")
print("-" * 70)

for token in doc:
    head_text = token.head.text if token.head != token else "ROOT"
    print(f"{token.text:<15} {token.lemma_:<15} {token.pos_:<10} {token.dep_:<15} {head_text}")

displacy.render(doc, style="dep", options={"compact": True, "distance": 100}, jupyter=True)

2025-12-14 18:41:20 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 367kB [00:00, 9.73MB/s]                    
2025-12-14 18:41:20 INFO: Loading these models for language: th (Thai):
| Processor | Package |
-----------------------
| tokenize  | orchid  |
| ner       | lst20   |

2025-12-14 18:41:20 INFO: Using device: cpu
2025-12-14 18:41:20 INFO: Loading: tokenize
2025-12-14 18:41:21 INFO: Loading: ner
2025-12-14 18:41:23 INFO: Done loading processors!


‡∏Ñ‡∏≥              ‡∏£‡∏≤‡∏Å‡∏Ñ‡∏≥           POS        Dep             Head
----------------------------------------------------------------------
‡πÅ‡∏°‡∏ß                                                        ROOT
‡∏™‡∏µ‡∏î‡∏≥                                                       ROOT
‡∏Å‡∏¥‡∏ô                                                        ROOT
‡∏õ‡∏•‡∏≤‡∏ó‡∏π                                                      ROOT
‡∏≠‡∏¢‡πà‡∏≤‡∏á                                                      ROOT
‡∏≠‡∏£‡πà‡∏≠‡∏¢                                                      ROOT


https://spacy.io/usage/models


In [None]:
def extract_action(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    
    buyer = None
    item = None
    
    for token in doc:
        if token.dep_ == "ROOT": # ‡πÄ‡∏à‡∏≠‡∏Å‡∏£‡∏¥‡∏¢‡∏≤‡∏´‡∏•‡∏±‡∏Å‡πÅ‡∏•‡πâ‡∏ß (‡πÄ‡∏ä‡πà‡∏ô bought)
            action = token.text
            
            for child in token.children:
                if child.dep_ == "nsubj":
                    buyer = child.text
                if child.dep_ == "dobj":
                    item = child.text
                    
            return f"Action: {action} | Who: {buyer} | What: {item}"

news1 = "Apple acquired a small AI startup."
news2 = "Microsoft sells its gaming division."

print(extract_action(news1)) 

print(extract_action(news2))

Action: acquired | Who: Apple | What: startup
Action: sells | Who: Microsoft | What: division
