In [1]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch

# 加载模型和分词器
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# 准备文本
text = "Hugging Face is a technology company based in New York."

# 编码文本
inputs = tokenizer(text, return_tensors="pt")

# 预测实体
with torch.no_grad():
    logits = model(**inputs).logits

# 解码预测结果
predicted_token_classes = logits.argmax(-1)
tokens = inputs.tokens()
predictions = [model.config.id2label[predicted_token_classes[0][i].item()] for i in range(len(tokens))]

# 输出结果
for token, prediction in zip(tokens, predictions):
    print(f"{token}: {prediction}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[CLS]: O
Hu: I-ORG
##gging: I-ORG
Face: I-ORG
is: O
a: O
technology: O
company: O
based: O
in: O
New: I-LOC
York: I-LOC
.: O
[SEP]: O


In [2]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

# 加载模型和分词器
model_name = "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# 准备文本
text = "Patient with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is showing signs of improvement."

# 编码文本
inputs = tokenizer(text, return_tensors="pt")

# 预测实体
with torch.no_grad():
    outputs = model(**inputs)

# 解码预测结果
logits = outputs.logits
predicted_token_classes = logits.argmax(-1).squeeze().tolist()
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze().tolist())
predictions = [model.config.id2label[token_class] for token_class in predicted_token_classes]

# 打印每个词及其预测实体类别
for token, prediction in zip(tokens, predictions):
    print(f"{token}: {prediction}")


Some weights of BertForTokenClassification were not initialized from the model checkpoint at bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CLS]: LABEL_1
patient: LABEL_1
with: LABEL_0
severe: LABEL_0
acute: LABEL_1
respiratory: LABEL_1
syndrome: LABEL_0
corona: LABEL_1
##virus: LABEL_1
2: LABEL_1
(: LABEL_1
sar: LABEL_1
##s: LABEL_1
-: LABEL_1
co: LABEL_0
##v: LABEL_1
-: LABEL_1
2: LABEL_1
): LABEL_1
is: LABEL_0
showing: LABEL_0
signs: LABEL_0
of: LABEL_0
improvement: LABEL_0
.: LABEL_0
[SEP]: LABEL_0


In [1]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch

# 加载模型和分词器
model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# 准备文本
text = "Recent studies on SARS-CoV-2 suggest potential vaccine targets."

# 编码文本
inputs = tokenizer(text, return_tensors="pt")

# 预测实体
with torch.no_grad():
    logits = model(**inputs).logits

# 解码预测结果
predicted_token_classes = logits.argmax(-1)
tokens = inputs.tokens()
predictions = [model.config.id2label[predicted_token_classes[0][i].item()] for i in range(len(tokens))]

# 输出结果
for token, prediction in zip(tokens, predictions):
    print(f"{token}: {prediction}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForTokenClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CLS]: LABEL_1
recent: LABEL_1
studies: LABEL_1
on: LABEL_1
sars: LABEL_1
-: LABEL_1
cov: LABEL_1
-: LABEL_1
2: LABEL_1
suggest: LABEL_1
potential: LABEL_1
vaccine: LABEL_1
targets: LABEL_1
.: LABEL_1
[SEP]: LABEL_1


In [7]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification
# cannot import name 'RoBERTaMultiNER2' from 'transformers' 

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/bern2-ner")
model = AutoModelForTokenClassification.from_pretrained("dmis-lab/bern2-ner")

# 准备文本
text = "Recent studies on SARS-CoV-2 suggest potential vaccine targets."

# 编码文本
inputs = tokenizer(text, return_tensors="pt")

# 预测实体
with torch.no_grad():
    logits = model(**inputs).logits

# 解码预测结果
predicted_token_classes = logits.argmax(-1)
tokens = inputs.tokens()
predictions = [model.config.id2label[predicted_token_classes[0][i].item()] for i in range(len(tokens))]

# 输出结果
for token, prediction in zip(tokens, predictions):
    print(f"{token}: {prediction}")


text = "Patient with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is showing signs of improvement."
# 编码文本
inputs = tokenizer(text, return_tensors="pt")

# 预测实体
with torch.no_grad():
    logits = model(**inputs).logits

# 解码预测结果
predicted_token_classes = logits.argmax(-1)
tokens = inputs.tokens()
predictions = [model.config.id2label[predicted_token_classes[0][i].item()] for i in range(len(tokens))]

# 输出结果
for token, prediction in zip(tokens, predictions):
    print(f"{token}: {prediction}")

<s>: I
Recent: O
Ġstudies: B
Ġon: O
ĠSARS: O
-: O
CoV: O
-: O
2: O
Ġsuggest: I
Ġpotential: I
Ġvaccine: B
Ġtargets: B
.: I
</s>: O
<s>: I
Patient: O
Ġwith: O
Ġsevere: O
Ġacute: O
Ġrespiratory: O
Ġsyndrome: O
Ġcoronavirus: O
Ġ2: O
Ġ(: I
SA: O
RS: O
-: O
CoV: O
-: O
2: O
): I
Ġis: O
Ġshowing: O
Ġsigns: O
Ġof: I
Ġimprovement: I
.: I
</s>: O
