# **Mistral-7B-Instruct-v0.1**

# **Step 1: Libraries**

In [4]:
!pip install langchain accelerate BitsAndBytesConfig --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m731.7/731.7 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.2/124.2 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.0/196.0 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.0/166.0 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.1/99.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

In [5]:
from tqdm import tqdm
import pandas as pd
import torch
import accelerate
from langchain import PromptTemplate, HuggingFacePipeline
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

# **Step 2: Load Data**

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
df = pd.read_csv('/content/drive/MyDrive/Medical_Transcription_Extraction/mtsamples.csv')

In [8]:
transcription_list = df['transcription'].to_list()

# **Step 3: Model**

In [9]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

PackageNotFoundError: No package metadata was found for bitsandbytes

In [None]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    trust_remote_code=True,
    quantization_config=quantization_config,
    low_cpu_mem_usage=True,
    device_map="auto"
    )

In [None]:
generation_config = GenerationConfig.from_pretrained('mistralai/Mistral-7B-Instruct-v0.1')
generation_config.max_new_tokens = 1024 # maximum number of new tokens that can be generated by the model
generation_config.temperature = 0.7 # randomness of the generated tex
generation_config.top_p = 0.95 # diversity of the generated text
generation_config.do_sample = True # sampling during the generation process
generation_config.repetition_penalty = 1.15 # the degree to which the model should avoid repeating tokens in the generated text

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipe)

# **Step 4: Inference**

In [None]:
def get_chatglm_response(transcription):
    prompt = f"""This is a patient's transcription: '{transcription}'
    For the given patient's transcription, do the next actions sequentially:
    First, propose the patient's age. If there is no words about age, return unknown.
    Second, propose no more than ten words that summarize the treatment. If you don't know the treatment, assume '''unknown'''
    Note: don't use any other words.
    Then, show your response following the next format:
    Age: <age>
    Treatment: The patient will get <treatment> as treatment.
    """
    prompt = PromptTemplate.from_template(prompt)
    response = llm.invoke(prompt.format(), max_length=250)
    return response

In [None]:
res = []
for transcription in tqdm(transcription_list[0:2]):
    response = get_chatglm_response(transcription)
    res.append(response)
print(res)