<a href="https://colab.research.google.com/github/Gowthamabinav-VP/SDC_GENAI/blob/main/Medical_Diagnosis_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries
!pip install transformers torch --quiet

# Import necessary libraries
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import torch

# Load the pre-trained BERT model (fine-tuned for medical text, if available)
model_name = "dmis-lab/biobert-v1.1"  # Example, a BioBERT model pre-trained on medical text
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Set up device for running the model (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to classify symptoms and suggest possible diseases
def diagnose(symptom_text):
    # Tokenize input symptoms
    inputs = tokenizer.encode(symptom_text, return_tensors="pt", truncation=True, max_length=512).to(device)

    # Get the model's predictions (disease classification)
    with torch.no_grad():
        outputs = model(inputs)

    # Process model output (for simplicity, we assume output as disease prediction score)
    predictions = torch.argmax(outputs.logits, dim=-1).item()

    # Return the predicted disease class and its score (this can be refined with actual disease classes)
    return predictions, outputs.logits[0][predictions].item()

# Example symptoms input (user can modify this)
symptom_text = """
Fever, dry cough, and tiredness. Difficulty breathing, chest pain.
"""

# Diagnose based on the input symptoms
disease_class, disease_score = diagnose(symptom_text)

# Output the prediction
print(f"Predicted Disease Class: {disease_class}")
print(f"Disease Prediction Score: {disease_score}")

# Mapping disease class ID to actual disease (dummy map, this should be replaced with actual mapping)
disease_map = {
    0: "COVID-19",
    1: "Flu",
    2: "Common Cold",
    # Add more diseases as needed
}

print(f"Predicted Disease: {disease_map.get(disease_class, 'Unknown Disease')}")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Predicted Disease Class: 1
Disease Prediction Score: -0.09530112892389297
Predicted Disease: Flu
