# **bigscience/bloomz-7b1**

# **Step 1: Libraries**

In [21]:
!pip install langchain accelerate bitsandbytes --quiet

In [22]:
import os
from tqdm import tqdm
import pandas as pd
import torch
import accelerate
from langchain import PromptTemplate, HuggingFacePipeline
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

# **Step 2: Load Data**

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [17]:
df = pd.read_csv('/content/drive/MyDrive/Medical_Transcription_Extraction/mtsamples.csv')

In [18]:
transcription_list = df['transcription'].to_list()

# **Step 3: Model**

In [19]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

**To get the huggingface token, click [here](https://huggingface.co/settings/tokens)**

In [23]:
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-7b1", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloomz-7b1",
    quantization_config=quantization_config,
    low_cpu_mem_usage=True,
    device_map="auto"
    )

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipe)

# **Step 4: Inference**

In [None]:
def get_chatglm_response(transcription):
    # prompt = f"""This is a patient's transcription: '{transcription}'
    # For the given patient's transcription, do the next actions sequentially:
    # First, propose the patient's age. If there is no words about age, return unknown.
    # Second, propose no more than ten words that summarize the treatment. If you don't know the treatment, assume '''unknown'''
    # Note: don't use any other words.
    # Then, show your response following the next format:
    # Age: <age>
    # Treatment: The patient will get <treatment> as treatment.
    # """
    prompt = f"This is a patient's transcription: '{transcription}' \n\n Please use this format as the answer: The patient age is [Your extracted answer]. The patient will get [Your extracted answer] as treatment. Note: If the treatment has already been administered, it's also considered as the treatment, the format will be revised slightly: The patient got [Your extracted answer] as treatment. If there is no specific neither age or treatment , you can return unknown, unknown. If there is no specific age but treatment, you can return unknown, The patient will get [Your extracted answer] as treatment. There may be not only one treatment"
    prompt = PromptTemplate.from_template(prompt)
    response = llm.invoke(prompt.format(), max_length=500)
    return response

In [None]:
res = []
for transcription in tqdm(transcription_list[0:2]):
    response = get_chatglm_response(transcription)
    res.append(response)
print(res)