In [1]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for accelerate (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━

In [3]:
import torch
from transformers import BertTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, DataCollatorWithPadding, pipeline
from peft import PeftConfig, PeftModel, PeftModelForSequenceClassification
from huggingface_hub import hf_hub_download

Load The Model

In [4]:
lab2id = {'RESULTS': 0, 'METHODS': 1, 'CONCLUSIONS': 2, 'BACKGROUND': 3, 'OBJECTIVE': 4}

# Convert lab2id to id2lab
id2lab = {v: k for k, v in lab2id.items()}

In [8]:
peft_model = "ManuelAlv/PubMed_Classify_ClinicalBert_adapters"
model_id = "ManuelAlv/PubMed_Classify_ClinicalBert"
filename = "pytorch_model.bin"

# BNB CONFIG
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# LOAD MODEL
config = PeftConfig.from_pretrained(peft_model)

model_original = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    load_in_4bit=True,
    id2label = id2lab,
    label2id = lab2id,
    # NUMBER OF CLASSES
    num_labels = 5,
    device_map={"":0}
    )

model = PeftModel.from_pretrained(model_original, peft_model)

# Load the weights
state_dict = torch.load(hf_hub_download(model_id, filename))
model.load_state_dict(state_dict)

# LOAD TOKENIZER
tokenizer = BertTokenizer.from_pretrained(config.base_model_name_or_path)

# LOAD DATA COLLATOR
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,
                                        #model=model
                                        )

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Use the Model

In [9]:
# Create the pipeline
category_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

The model 'PeftModel' is not supported for text-classification. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'GPT2ForSequenceClassification', 'GPT2ForSequenceClassifi

In [14]:
text = "The 30-day mortality rate was 21 per cent after EVAR and 25 per cent for OR : absolute risk reduction ( ARR ) 44 ( 95 per cent confidence interval ( c.i. ) -110 to 197 ) per cent ."
category_classifier(text)

[{'label': 'CONCLUSIONS', 'score': 0.24259960651397705}]