In [None]:
## Pre train Model

In [5]:
#### Healthcare case studies


import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Function to generate answer for a patient question
def generate_answer(question):
    # Encode the question
    input_ids = tokenizer.encode(question, return_tensors="pt")

    # Generate answer using GPT-2 model
    with torch.no_grad():
        output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7)
    
    # Decode the generated answer
    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    
    return answer

# Example usage
question = "What are the symptoms of COVID-19?"
answer = generate_answer(question)
print("Answer:", answer)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Answer: What are the symptoms of COVID-19?

COVID-19 is a common cold. It is caused by a virus that causes a cold. It is usually caused by a virus that causes a cold. It is usually caused by a virus that causes a cold. It is usually caused by a virus that causes a cold. It is usually caused by a virus that causes a cold. It is usually caused by a virus that causes a cold. It is usually caused by a virus that


In [None]:
## RAG

In [8]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import torch

# Sample patient question-answer pairs
patient_data = [
    {"question": "What are the symptoms of COVID-19?", "answer": "Common symptoms of COVID-19 include fever, cough, and shortness of breath."},
    {"question": "How is diabetes diagnosed?", "answer": "Diabetes is diagnosed through tests such as fasting blood sugar, oral glucose tolerance test, etc."},
    {"question": "What treatments are available for asthma?", "answer": "Treatments for asthma include bronchodilators, inhaled corticosteroids, etc."},
    # Add more patient questions and corresponding answers
]

# Load pre-trained Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
df = pd.DataFrame(patient_data)

# Embed the answers
answer_embeddings = model.encode(df['answer'], convert_to_tensor=True)

def calculate_similarity(query, embeddings):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
    return cos_scores

def input_text_model(input_text):
    input_sentence_processed = ''.join([i for i in input_text if not i.isdigit()])
    query = input_sentence_processed
    query_similarity = calculate_similarity(query, answer_embeddings)
    top_n = 1
    top_indices = query_similarity.argsort().cpu().numpy()[-top_n:]
    similar_answers = df.loc[top_indices, 'answer'].tolist()
    return similar_answers[-1]

# Example usage
patient_question = "symptoms of covid?"
answer = input_text_model(patient_question)
print("Patient Question:", patient_question)
print("Generated Answer:", answer)


Patient Question: symptoms of covid?
Generated Answer: Common symptoms of COVID-19 include fever, cough, and shortness of breath.
