In [1]:
# Verifica qual a GPU disponível
!nvidia-smi

Thu Apr  8 03:22:46 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# Verifica a versão CUDA instalada
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [3]:
# Instala o CuPy, uma biblioteca especializada para trabalhar com matrizes e CUDA
# Nesse caso, instalamos a mesma versão compatível com CUDA 11.0
!pip install -U --quiet pip setuptools wheel
!pip --quiet install cupy-cuda110

[K     |████████████████████████████████| 1.5MB 7.5MB/s 
[K     |████████████████████████████████| 165.3 MB 41 kB/s 
[?25h

In [None]:
# Atualiza o PyTorch de acordo com a versão CUDA
!pip install --quiet torch==1.8.1+cu102 torchvision==0.9.1+cu102 torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html

In [4]:
# Atualiza o spaCy de acordo com a versão CUDA
!pip install -U --quiet pip setuptools wheel
!pip install -U --quiet spacy[cuda110]

# Instala o modelo transformers
!python -m spacy download en_core_web_trf --quiet

[K     |████████████████████████████████| 12.8 MB 247 kB/s 
[K     |████████████████████████████████| 456 kB 56.7 MB/s 
[K     |████████████████████████████████| 9.1 MB 71.3 MB/s 
[K     |████████████████████████████████| 1.1 MB 58.5 MB/s 
[K     |████████████████████████████████| 113 kB 81.1 MB/s 
[?25h  Building wheel for smart-open (setup.py) ... [?25l[?25hdone
2021-04-08 03:26:22.078232: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[K     |████████████████████████████████| 459.7 MB 17 kB/s 
[K     |████████████████████████████████| 977 kB 6.9 MB/s 
[K     |████████████████████████████████| 66 kB 4.6 MB/s 
[K     |████████████████████████████████| 1.8 MB 19.7 MB/s 
[K     |████████████████████████████████| 862 kB 54.7 MB/s 
[K     |████████████████████████████████| 2.9 MB 59.5 MB/s 
[?25h  Building wheel for ftfy (setup.py) ... [?25l[?25hdone
  Building wheel for torchcontrib (setup.py) ... [?

In [5]:
# Verifica se as atualizações foram realizadas corretamente
!python -m spacy info
!python -m spacy validate

2021-04-08 03:28:23.723281: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[1m

spaCy version    3.0.5                         
Location         /usr/local/lib/python3.7/dist-packages/spacy
Platform         Linux-4.19.112+-x86_64-with-Ubuntu-18.04-bionic
Python version   3.7.10                        
Pipelines        en_core_web_trf (3.0.0)       

2021-04-08 03:28:27.261740: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[2K[38;5;2m✔ Loaded compatibility table[0m
[1m
[38;5;4mℹ spaCy installation: /usr/local/lib/python3.7/dist-packages/spacy[0m

NAME              SPACY            VERSION                            
en_core_web_trf   >=3.0.0,<3.1.0   [38;5;2m3.0.0[0m   [38;5;2m✔[0m



In [6]:
# Importa bibliotecas e módulos necessários
import pandas as pd
import spacy
import torch

torch.__version__

'1.8.1+cu101'

In [8]:
# Cria o arquivo de configuração para treinamento e validação: config.cfg
!python -m spacy init fill-config ./base_config.cfg ./config.cfg

2021-04-08 03:36:42.006934: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


> Alterações realizadas no arquivo `config.cfg`:

    [paths]
    train = "train.spacy"
    dev = "valid.spacy"
    
    [training.logger]
    progress_bar = true

In [9]:
# Verifica a consistência dos arquivos de treino e teste
!python -m spacy debug data config.cfg

2021-04-08 03:37:25.885129: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[1m
[38;5;2m✔ Corpus is loadable[0m
Downloading: 100% 481/481 [00:00<00:00, 576kB/s]
Downloading: 100% 899k/899k [00:00<00:00, 2.40MB/s]
Downloading: 100% 456k/456k [00:00<00:00, 1.46MB/s]
Downloading: 100% 1.36M/1.36M [00:00<00:00, 3.49MB/s]
Downloading: 100% 501M/501M [00:14<00:00, 35.3MB/s]
[38;5;2m✔ Pipeline can be initialized with data[0m
[1m
Language: en
Training pipeline: transformer, ner
10713 training docs
6348 evaluation docs
[38;5;2m✔ No overlap between training and evaluation data[0m
[1m
[38;5;4mℹ 2493128 total word(s) in the data (73563 unique)[0m
[38;5;4mℹ No word vectors present in the package[0m
[1m
[38;5;4mℹ 0 new label(s), 4 existing label(s)[0m
0 missing value(s) (tokens with '-' label)
[38;5;3m⚠ 1486 entity span(s) with punctuation[0m
[38;5;2m✔ Good amount of examples for all labels[0m
[38;5;2m✔ Exampl

In [10]:
%%time
# Treina e valida o modelo a partr dos aquivos train.spacy e test.spacy, 
# - O caminho para esses arquivos foram inseridos no início do arquivo config.cfg:
# [paths]
# train = train.spacy
# dev = test.spacy
!python -m spacy train config.cfg --gpu-id 0 --output ./

2021-04-08 03:40:50.266190: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[38;5;4mℹ Using GPU: 0[0m
[1m
[2021-04-08 03:40:52,702] [INFO] Set up nlp object from config
[2021-04-08 03:40:52,711] [INFO] Pipeline: ['transformer', 'ner']
[2021-04-08 03:40:52,715] [INFO] Created vocabulary
[2021-04-08 03:40:52,715] [INFO] Finished initializing nlp object
[2021-04-08 03:41:46,752] [INFO] Initialized pipeline components: ['transformer', 'ner']
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['transformer', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.0[0m
E    #       LOSS TRANS...  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  -------------  --------  ------  ------  ------  ------
  0       0         363.10    486.90    0.51    0.26    7.17    0.01
  0     200      322538.71  84292.89   20.07   33.19   14.38    0.20
  0     400       33503.87  20539.71   57.62   50.11   67.79    0.58
  0     600 

In [12]:
# Avalia o modelo a partir do arquivo test.spacy
!python -m spacy evaluate ./model-best ./test.spacy -g 0

2021-04-08 06:46:45.452523: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[38;5;4mℹ Using GPU: 0[0m
[1m

TOK     -    
NER P   71.63
NER R   69.81
NER F   70.71
SPEED   5007 

[1m

                   P       R       F
DRUG_PROTEIN   72.79   61.54   66.69
CHEMICALS      72.47   83.66   77.66
SPECIES        56.36   62.10   59.09
DISEASE        72.83   13.84   23.26



In [14]:
corpus = """Autophagy maintains tumour growth through circulating arginine. Autophagy captures intracellular components and delivers them to lysosomes, 
            where they are degraded and recycled to sustain metabolism and to enable survival during starvation1-5. Acute, whole-body deletion of the essential
            autophagy gene Atg7 in adult mice causes a systemic metabolic defect that manifests as starvation intolerance and gradual loss of white adipose tissue,
            liver glycogen and muscle mass1. Cancer cells also benefit from autophagy. Deletion of essential autophagy genes impairs the metabolism, proliferation,
            survival and malignancy of spontaneous tumours in models of autochthonous cancer6,7. Acute, systemic deletion of Atg7 or acute, systemic expression of
            a dominant-negative ATG4b in mice induces greater regression of KRAS-driven cancers than does tumour-specific autophagy deletion, which suggests that
            host autophagy promotes tumour growth1,8. Here we show that host-specific deletion of Atg7 impairs the growth of multiple allografted tumours,
            although not all tumour lines were sensitive to host autophagy status. Loss of autophagy in the host was associated with a reduction in circulating arginine,
            and the sensitive tumour cell lines were arginine auxotrophs owing to the lack of expression of the enzyme argininosuccinate synthase 1.
            Serum proteomic analysis identified the arginine-degrading enzyme arginase I (ARG1) in the circulation of Atg7-deficient hosts,
            and in vivo arginine metabolic tracing demonstrated that serum arginine was degraded to ornithine. ARG1 is predominantly expressed in the liver and can be
            released from hepatocytes into the circulation. Liver-specific deletion of Atg7 produced circulating ARG1, and reduced both serum arginine and tumour growth.
            Deletion of Atg5 in the host similarly regulated [corrected] circulating arginine and suppressed tumorigenesis, which demonstrates that this phenotype
            is specific to autophagy function rather than to deletion of Atg7. Dietary supplementation of Atg7-deficient hosts with arginine partially restored
            levels of circulating arginine and tumour growth.
            Thus, defective autophagy in the host leads to the release of ARG1 from the liver and the degradation of circulating arginine, which is essential
            for tumour growth; this identifies a metabolic vulnerability of cancer. (PMID:30429607)"""

In [15]:
# Carrega o modelo customizado e cria um objeto nlp
spacy.prefer_gpu()
nlp = spacy.load('./model-best')

# Processa o texto
doc = nlp(corpus)

In [16]:
options = {'ents':['CHEMICALS', 'DRUG_PROTEIN', 'DISEASE', 'SPECIES'],
           'colors':{'CHEMICALS': '#D2B4DE',
                     'DRUG_PROTEIN': '#82E0AA',
                     'DISEASE': '#D7BDE2',
                     'SPECIES': '#A2D9CE'}}

spacy.displacy.render(doc, style="ent", jupyter=True, options=options)

In [22]:
# # Carrega a base de dados para inferência
# corona_dataset = pd.read_csv('./corona_dataset.csv')

# # Remove colunas desnecessárias
# corona_dataset.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis='columns',
#                     inplace=True)

# # Remove dados faltantes
# corona_dataset.dropna(inplace=True)

# # Exibe as linhas inicias da base de dados
# corona_dataset.head()

In [21]:
# corpus = ' '.join(corona_dataset['body'])

# # Processa o texto
# doc = nlp(corpus)

# spacy.displacy.render(doc, style="ent", jupyter=True, options=options)