In [0]:
import os
path = "/content/drive/My Drive/NLP/sentiment_compete"
os.chdir(path)

In [0]:
!pip install transformers



In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
from processData import DataProcessor
from transformers import BertTokenizer,BertConfig
from transformers import BertModel
from config import Config
import torch
import pandas as pd
import random
import numpy as np
from tqdm import tqdm
from executor import ModelExcuter
from model.bert_model_base import BertModel_Base
from model.robert_model_base import RoBertModel_Base
from model.ernie_model_base import ErnieModel_Base
from model.ernie_model_pool_last3 import Ernie_PoolLast3_Model

SEED = 12345

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [0]:
config = Config()
bert_config = BertConfig.from_pretrained(config.bert_config_path, output_hidden_states=True)
tokenizer = BertTokenizer.from_pretrained(config.bert_vocab_path)

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


In [0]:
dataProcessor = DataProcessor(tokenizer, config.max_seq_len, config.device)

In [0]:
train_dataset = dataProcessor.get_train_feature(config.train_data_path)
dev_dataset = dataProcessor.get_dev_feature(config.dev_data_path)

100%|██████████| 81098/81098 [00:47<00:00, 1715.96it/s]
100%|██████████| 9011/9011 [00:05<00:00, 1791.13it/s]


In [0]:
test_dataset, test_df = dataProcessor.get_test_feature(config.test_data_path)

100%|██████████| 10000/10000 [00:05<00:00, 1778.04it/s]


In [0]:
model = RoBertModel_Base(config.bert_model_path, bert_config, config.max_seq_len).to(config.device)

Linear
Linear


In [0]:
model = Ernie_PoolLast3_Model(config.bert_model_path, bert_config, config.max_seq_len).to(config.device)

Linear


In [0]:
model = BertModel_Base(config.bert_model_path, bert_config, config.max_seq_len).to(config.device)

In [0]:
modelExcuter = ModelExcuter(train_dataset, dev_dataset, config)

In [0]:
modelExcuter.train(model)

epoch [1/3]
Epoch: 1 Iter:    500, Train Loss:  0.74, Train Acc: 56.250%, Dev Loss:  0.67, Dev Acc: 70.758%, f1_score: 0.6589965, Time: 0:03:44 *
Epoch: 1 Iter:   1000, Train Loss:  0.46, Train Acc: 87.500%, Dev Loss:  0.63, Dev Acc: 72.012%, f1_score: 0.689708, Time: 0:09:00 *
Epoch: 1 Iter:   1500, Train Loss:  0.81, Train Acc: 81.250%, Dev Loss:  0.62, Dev Acc: 73.211%, f1_score: 0.6985684, Time: 0:14:16 *
Epoch: 1 Iter:   2000, Train Loss:   1.0, Train Acc: 50.000%, Dev Loss:   0.6, Dev Acc: 72.977%, f1_score: 0.6984105, Time: 0:19:31  
Epoch: 1 Iter:   2500, Train Loss:  0.66, Train Acc: 68.750%, Dev Loss:   0.6, Dev Acc: 73.477%, f1_score: 0.7056401, Time: 0:24:45 *
Epoch: 1 Iter:   3000, Train Loss:  0.46, Train Acc: 81.250%, Dev Loss:   0.6, Dev Acc: 73.732%, f1_score: 0.7084117, Time: 0:30:00 *
Epoch: 1 Iter:   3500, Train Loss:   0.4, Train Acc: 87.500%, Dev Loss:   0.6, Dev Acc: 73.510%, f1_score: 0.7092664, Time: 0:35:16 *
Epoch: 1 Iter:   4000, Train Loss:  0.59, Train Acc

In [0]:
modelExcuter.predict(model, test_dataset, test_df['微博id'])



  0%|          | 0/625 [00:00<?, ?it/s][A[A

  0%|          | 1/625 [00:00<02:02,  5.09it/s][A[A

  0%|          | 2/625 [00:00<01:55,  5.41it/s][A[A

  0%|          | 3/625 [00:00<01:49,  5.66it/s][A[A

  1%|          | 4/625 [00:00<01:46,  5.84it/s][A[A

  1%|          | 5/625 [00:00<01:44,  5.95it/s][A[A

  1%|          | 6/625 [00:01<01:45,  5.88it/s][A[A

  1%|          | 7/625 [00:01<01:47,  5.73it/s][A[A

  1%|▏         | 8/625 [00:01<01:45,  5.85it/s][A[A

  1%|▏         | 9/625 [00:01<01:42,  5.99it/s][A[A

  2%|▏         | 10/625 [00:01<01:41,  6.09it/s][A[A

  2%|▏         | 11/625 [00:01<01:40,  6.14it/s][A[A

  2%|▏         | 12/625 [00:02<01:41,  6.02it/s][A[A

  2%|▏         | 13/625 [00:02<01:44,  5.84it/s][A[A

  2%|▏         | 14/625 [00:02<01:44,  5.85it/s][A[A

  2%|▏         | 15/625 [00:02<01:42,  5.93it/s][A[A

  3%|▎         | 16/625 [00:02<01:41,  6.01it/s][A[A

  3%|▎         | 17/625 [00:02<01:39,  6.08it/s][A[A

  3%|▎  

Time usage: 0:01:42
finish !





In [0]:
!nvidia-smi

Tue Apr  7 14:46:52 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    10W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
!ps -aux

USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root           1  0.0  0.0  39192  6384 ?        Ss   09:54   0:00 /bin/bash -e 
root           8  0.0  0.4 691860 62368 ?        Sl   09:54   0:11 /tools/node/b
root          23  0.2  0.7 405724 101704 ?       Sl   09:54   0:29 /usr/bin/pyth
root         113  0.0  0.0  35884  4748 ?        Ss   09:55   0:00 tail -n +0 -F
root         122 81.2 37.2 34854896 4972520 ?    Ssl  09:55 169:14 /usr/bin/pyth
root         157  0.0  0.0  18376  1480 ?        S    09:55   0:00 /bin/bash --n
root         158  0.0  0.1 2174204 15780 ?       Sl   09:55   0:00 /opt/google/d
root         159  0.0  0.0  11464  1004 ?        S    09:55   0:00 grep --color=
root         162  0.4  0.6 2447308 83276 ?       Sl   09:55   0:54 /opt/google/d
root         214  0.0  0.0  18376  2988 ?        S    09:55   0:00 bash -c tail 
root         215  0.0  0.0   4568   804 ?        S    09:55   0:00 tail -n +0 -F
root         216  0.0  0.0  11464 

In [0]:
!kill -9 122