**Crear entorno virtual en consola:**

_python -m venv venv_

**Activar entorno virtual en consola:**

_.\venv\Scripts\activate.ps1_

**Desactivar entorno virtual en consola:**

_deactivate_

**Instalar librerias necesarias:**

Utilizando CPU:
_pip install spacy_

## Instalación

In [None]:
# https://spacy.io/usage

# pip install spacy
# python -m spacy download es_core_news_sm

### Comprobar GPU, y version de CUDA (si disponible)

In [34]:
import subprocess

print("NVIDIA Graphics Card Driver: ", subprocess.getoutput("nvidia-smi")[:980])
print("CUDA version: ", subprocess.getoutput("nvcc --version"), "\n")

NVIDIA Graphics Card Driver:  Fri Jun 24 01:59:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.59       Driver Version: 512.59       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
| N/A   52C    P8    10W /  N/A |   1178MiB /  8192MiB |     31%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                    
CUDA version:  nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 20

### Compatibilidad con Tensorflow

In [None]:
# import subprocess
# import tensorflow as tf

# def get_cuda_info():
#     print("NVIDIA Graphics Card Driver: ", subprocess.getoutput("nvidia-smi")[:980])
#     print("CUDA version: ", subprocess.getoutput("nvcc --version"), "\n")

# def get_tf_info():
#     print(subprocess.getoutput("pip show tensorflow"), "\n")

# def check_compatibility():
#     print(tf.config.list_physical_devices('GPU'))

### Compatibilidad con Pytorch

In [None]:
# import subprocess
# import torch

# def get_cuda_info():
#     print("NVIDIA Graphics Card Driver: ", subprocess.getoutput("nvidia-smi")[:980])
#     print("CUDA version: ", subprocess.getoutput("nvcc --version"), "\n")

# def get_torch_info():
#     print(subprocess.getoutput("pip show torch"), "\n")

# def get_device(show_info = True):
#     if torch.cuda.is_available():    
#         device = torch.device("cuda")

#         if show_info:
#             print('There are %d GPU(s) available.' % torch.cuda.device_count())
#             print('We will use the GPU:', torch.cuda.get_device_name(0))

#     else:
#         device = torch.device("cpu")

#         if show_info:
#             print('No GPU available, using the CPU instead.')

#     return device

## spaCy Pipeline

### Introducción

In [2]:
# https://spacy.io/usage/spacy-101

In [38]:
import spacy

nlp = spacy.load("es_core_news_sm")

print(nlp.pipeline)
print(nlp.pipe_names)

# https://spacy.io/usage/processing-pipelines#built-in

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec object at 0x0000014E931F3820>), ('morphologizer', <spacy.pipeline.morphologizer.Morphologizer object at 0x0000014F9714DBE0>), ('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x0000014E9BA32580>), ('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x0000014F97949540>), ('lemmatizer', <spacy.lang.es.lemmatizer.SpanishLemmatizer object at 0x0000014E97CBF540>), ('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x0000014E9BA32970>)]
['tok2vec', 'morphologizer', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']


In [36]:
doc = nlp("Esto es una frase.")
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

Esto este PRON PRON nsubj Xxxx True True
es ser AUX AUX cop xx True True
una uno DET DET det xxx True True
frase frase NOUN NOUN ROOT xxxx True False
. . PUNCT PUNCT punct . False False


In [19]:
doc = nlp.make_doc("Esto es una frase.")
for name, proc in nlp.pipeline:
    print(name)
    doc = proc(doc)
    token = next(iter(doc))
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

# tok2vec: Assign token-to-vector embeddings.
# morphologizer : Assign morphological features and coarse-grained POS tags.
# parser: Assign dependency labels.

tok2vec
Esto     Xxxx True True
morphologizer
Esto  PRON   Xxxx True True
parser
Esto  PRON  nsubj Xxxx True True
attribute_ruler
Esto  PRON PRON nsubj Xxxx True True
lemmatizer
Esto este PRON PRON nsubj Xxxx True True
ner
Esto este PRON PRON nsubj Xxxx True True


In [21]:
textos=["Esta es la primera frase.", "Y esta es la segunda"]

for doc in nlp.pipe(textos):
    print(doc)

Esta es la primera frase.
Y esta es la segunda


### Habilitar, deshabilitar y seleccionar componentes

In [38]:
nlp = spacy.load("es_core_news_sm")
print(nlp.pipe_names)

nlp = spacy.load("es_core_news_sm", exclude=["ner"])
print(nlp.pipe_names)

nlp = spacy.load("es_core_news_sm", disable=["ner"])
nlp.enable_pipe("ner")
print(nlp.pipe_names)

['tok2vec', 'morphologizer', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']
['tok2vec', 'morphologizer', 'parser', 'attribute_ruler', 'lemmatizer']
['tok2vec', 'morphologizer', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']


In [37]:
nlp = spacy.load("es_core_news_sm")
disabled = nlp.select_pipes(enable=["tok2vec", "morphologizer"])
print(nlp.pipe_names)

disabled.restore()
print(nlp.pipe_names)

['tok2vec', 'morphologizer']
['tok2vec', 'morphologizer', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']


### Tokenización

In [4]:
nlp = spacy.load("es_core_news_sm")

doc = nlp("Londres se encuentra en U.K.")
for token in doc:
    print(token.text)

# https://spacy.io/usage/spacy-101#annotations-token

Londres
se
encuentra
en
U.K.


### Tags de POS y dependencias

In [7]:
doc = nlp("El perro come su comida")

for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

El el DET DET det Xx True True
perro perro PROPN PROPN nsubj xxxx True False
come comar VERB VERB ROOT xxxx True False
su su DET DET det xx True True
comida comida NOUN NOUN obj xxxx True False


### NER

In [14]:
doc = nlp("Sundar Pichai es el director ejecutivo de Alphabet Inc, y vive en Estados Unidos")

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Sundar Pichai 0 13 PER
Alphabet Inc 42 54 ORG
en Estados Unidos 63 80 LOC


### Word Embeddings y Similaridad

In [None]:
# !python -m spacy download es_core_news_lg

In [26]:
nlp = spacy.load("es_core_news_lg")
doc = nlp("perro gato banana afskfsd")	

for token in doc:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

# print(next(iter(doc)).vector)

perro True 32.17176 False
gato True 35.150936 False
banana True 18.258509 False
afskfsd False 0.0 True


In [29]:
doc1 = nlp("Me gustan las hamburguesas")
doc2 = nlp("Ayer comí panchos con queso")
doc3 = nlp("Ayer tomé un daikiri")

print(doc1, "<->", doc2, doc1.similarity(doc2))
print(doc1, "<->", doc3, doc1.similarity(doc3))

Me gustan las hamburguesas <-> Ayer comí panchos con queso 0.2322320265491657
Me gustan las hamburguesas <-> Ayer tomé un daikiri 0.0053885496820099245


In [32]:
doc1 = nlp("perro")
doc2 = nlp("gato")
doc3 = nlp("pez")
doc4 = nlp("avion")

print(doc1, "<->", doc2, doc1.similarity(doc2))
print(doc1, "<->", doc3, doc1.similarity(doc3))
print(doc1, "<->", doc4, doc1.similarity(doc4))

perro <-> gato 0.8487286452393603
perro <-> pez 0.42226599731024445
perro <-> avion 0.10443127598489513


In [None]:
# !python -m spacy download en_core_web_lg

In [33]:
nlp = spacy.load("en_core_web_lg")
doc1 = nlp("I like salty fries and hamburgers.")
doc2 = nlp("Fast food tastes very good.")

print(doc1, "<->", doc2, doc1.similarity(doc2))

I like salty fries and hamburgers. <-> Fast food tastes very good. 0.7687607012190486


## DisplaCy

### Dependency parsing

In [41]:
import spacy
from spacy import displacy

nlp = spacy.load("es_core_news_sm")
doc = nlp("El perro come su comida.")

options = { "compact": False,
            "distance": 120,
            "fine_grained": True,
            "collapse_punct": False,
            "add_lemma": False,
            }

displacy.render(doc, style="dep", options=options)

### NER tagging

In [12]:
nlp = spacy.load("es_core_news_sm")
doc = nlp("Sundar Pichai es el director ejecutivo de Alphabet Inc, y vive en Estados Unidos")

displacy.render(doc, style="ent")

### Universal Dependencies

In [None]:
# !pip install stanza spacy_stanza

In [None]:
# # https://spacy.io/universe/project/spacy-stanza
# import stanza
# import spacy_stanza
# from spacy import displacy

# stanza.download(lang="en", model_dir="./stanza_models")

In [None]:
# nlp = spacy_stanza.load_pipeline(name="en", dir="./stanza_models")
# doc = nlp("Rats are various medium-sized, long-tailed rodents.")
# options = { "compact": True,
#             "distance": 120,
#             "fine_grained": True,
#             "collapse_punct": False,
#             "add_lemma": True,
#             }
# displacy.render(doc, style="dep", options=options)