## Info:
To run this notebook you need to install `transformers`, `torch`, `langchain` and `bitsandbytes`.

## Instatiating the model

In [9]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate,  LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM
from nltk.tokenize import sent_tokenize
import transformers
from torch import cuda, bfloat16
import torch
import csv
import re
import json
from tqdm import tqdm
import time

model_name = "Universal-NER/UniNER-7B-all"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto'
)
model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:19<00:00,  6.51s/it]

Model loaded on cuda:1





In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

pipeline = transformers.pipeline(
    "text-generation", #task
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    max_length=1000,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,

)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0, "batch_size":1})

In [3]:
template = """
              A virtual assistant answers questions from a user based on the provided text.
              USER: Text: {input_text}
              ASSISTANT: I’ve read this text.
              USER: What describes {entity_type} in the text?
              ASSISTANT:
           """

prompt = PromptTemplate(template=template, input_variables=["input_text","entity_type"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [10]:
sentence="In the tumultuous era of the Baroque, Caravaggio's innovative use of dramatic lighting and his penchant for portraying ordinary subjects with extraordinary realism revolutionized the art world, ushering in a new era of naturalism that would influence countless painters for generations to come, his 'Judith Beheading Holofernes' stands as an epitome of this artistic transformation, where the blood-red drapery of Judith's dress contrasts starkly with the dark, foreboding background, intensifying the emotional impact of the brutal scene."
_type ="Artwork"

In [11]:
result_string = llm_chain.run({"input_text":sentence,"entity_type":_type})
print(result_string)

 ["Judith Beheading Holofernes"]


In [15]:
sentence="In the tumultuous era of the Baroque, Caravaggio's innovative use of dramatic lighting and his penchant for portraying ordinary subjects with extraordinary realism revolutionized the art world, ushering in a new era of naturalism that would influence countless painters for generations to come, his 'Judith Beheading Holofernes' stands as an epitome of this artistic transformation, where the blood-red drapery of Judith's dress contrasts starkly with the dark, foreboding background, intensifying the emotional impact of the brutal scene."
_type ="Entity"
result_string = llm_chain.run({"input_text":sentence,"entity_type":_type})
print(result_string)

 []
