In [None]:
!pip install -U bitsandbytes

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Arthur-77/QWEN2.5-1.5B-medical-finetuned"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

prompt = "Good Morning Doctor I am facing weakness from a couple of weeks."
messages = [
    {"role": "system", "content": "You are a doctor. A patient has visited you, conversate with the patient and gradually reach to conclusion"},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]


In [None]:
print(response)


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def load_model(destination_folder):
    """Load the tokenizer and model from the specified checkpoint."""
    tokenizer = AutoTokenizer.from_pretrained(destination_folder)

    # Set pad token to be the same as eos token if not already set

    tokenizer.pad_token_id = 151643
    tokenizer.eos_token_id = 151645

    model = AutoModelForCausalLM.from_pretrained(
        destination_folder,
        load_in_4bit=True,
        device_map="auto",
        torch_dtype=torch.float16  # Resolve dtype warning
    )
    return tokenizer, model

def generate_response(tokenizer, model, messages):
    """Generate a response based on the conversation history."""
    # Prepare input with attention mask
    chat_template = tokenizer.apply_chat_template(
        messages,
        truncation=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Create attention mask
    attention_mask = chat_template.ne(tokenizer.pad_token_id).to("cuda")

    outputs = model.generate(
        input_ids=chat_template,
        attention_mask=attention_mask,  # Add attention mask
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id  # Explicitly set pad token
    )
    conversation_message.append(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

def is_conversation_end(user_message):
    """Check if the user's message indicates the end of conversation."""
    end_phrases = [
        "goodbye",
        "bye",
        "end conversation",
        "that's all",
        "thank you",
        "thanks"
    ]
    return any(phrase in user_message.lower() for phrase in end_phrases)

def main():
    # Load the model
    destination_folder = "Arthur-77/QWEN2.5-1.5B-medical-finetuned"
    tokenizer, model = load_model(destination_folder)

    # Initial system message
    messages = [
        {
            "role": "system",
            "content": "You are an experienced doctor. Diagnose the problem faced by the patient based on the symptoms provided by them. Ask for any additional inputs if required to diagnose the problem. If you are not sure say seek medical attention.",
        }
    ]

    # Conversational loop
    print("Doctor Bot: Hello! How are you feeling today?")

    while True:
        # Get user input
        user_input = input("You: ")

        # Add user message to conversation history
        messages.append({"role": "user", "content": user_input})

        # Check for conversation end
        if is_conversation_end(user_input):
            print("Doctor Bot: Goodbye! Take care of yourself.")
            break

        # Generate and print bot response
        bot_response = generate_response(tokenizer, model, messages)
        print("Doctor Bot:", bot_response)

        # Add bot response to conversation history
        messages.append({"role": "assistant", "content": bot_response})

if __name__ == "__main__":
    main()