In [None]:
!git clone https://huggingface.co/ruslanmv/Medical-Llama3-8B

In [None]:
!pip install transformers bitsandbytes accelerate torch

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

In [None]:
model_name = "ruslanmv/Medical-Llama3-8B"
device_map = 'auto'

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    use_cache=False,
    device_map=device_map
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tokenizer.chat_templates = {
    "default": {
        "system": "You are an AI Medical Assistant trained to provide health advice. Please answer thoroughly.",
        "assistant": "AI: {content}"
    }
}
tokenizer.chat_template = "default"

In [None]:
conversation_history = []

In [None]:
def askme(question):
    global conversation_history

    if question is None or question.strip() == "":
        raise ValueError("The question must be defined before calling askme().")

    # Add the user's question to the conversation history
    conversation_history.append({"role": "user", "content": question})

    # Construct the prompt for the model
    prompt = f"User: {question}\nAI:"

    # Tokenize and generate the response
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=50, use_cache=True)

    # Extract and return the generated text
    response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()

    # Handle the response format
    answer = response_text.split('AI:')[-1].strip() if 'AI:' in response_text else response_text

    conversation_history.append({"role": "assistant", "content": answer})

    return answer


In [None]:
def start_conversation():
    print("Medical Assistant is ready. Type your question (type 'exit' to stop):\n")

    while True:
        user_input = input("You: ")

        if user_input.lower() == 'exit':
            print("Ending conversation.")
            break

        response = askme(user_input)

        print(f"AI: {response}\n")

start_conversation()