In [1]:
# Run this in the first cell
!pip install transformers torch scikit-learn pandas accelerate -q

In [2]:
from google.colab import files
uploaded = files.upload()

Saving medical_diagnostic_data.csv to medical_diagnostic_data.csv


In [3]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
import warnings
warnings.filterwarnings('ignore')

In [4]:
print("Loading dataset...")
df = pd.read_csv('medical_diagnostic_data.csv')

# Clean and prepare symptoms text
df['symptoms_text'] = df['symptoms'].str.replace(';', ', ').str.replace('_', ' ')

# Prepare disease labels (single-label classification)
print("\nPreparing disease labels...")
label_encoder = LabelEncoder()
df['disease_label'] = label_encoder.fit_transform(df['disease_name'])
num_diseases = len(label_encoder.classes_)

print(f"Total diseases: {num_diseases}")
print(f"Disease classes: {label_encoder.classes_}")

# Prepare test labels (multi-label classification)
print("\nPreparing test labels...")
df['tests_list'] = df['recommended_tests'].str.split(';')
mlb = MultiLabelBinarizer()
test_labels = mlb.fit_transform(df['tests_list'])
num_tests = len(mlb.classes_)

print(f"Total unique tests: {num_tests}")
print(f"Test classes: {mlb.classes_[:10]}...")  # Show first 10

# Split data
print("\nSplitting data...")
train_texts, val_texts, train_disease_labels, val_disease_labels, train_test_labels, val_test_labels = train_test_split(
    df['symptoms_text'].tolist(),
    df['disease_label'].tolist(),
    test_labels,
    test_size=0.2,
    random_state=42,
    stratify=df['disease_label']
)

print(f"Training samples: {len(train_texts)}")
print(f"Validation samples: {len(val_texts)}")

Loading dataset...

Preparing disease labels...
Total diseases: 20
Disease classes: ['Acute Myocardial Infarction' 'Anemia' 'Asthma' 'Celiac Disease'
 'Chronic Kidney Disease' 'Depression' 'Gastroesophageal Reflux Disease'
 'Hepatitis C' 'Hypertension' 'Hyperthyroidism' 'Hypothyroidism'
 'Migraine' 'Multiple Sclerosis' 'Osteoporosis' 'Pneumonia'
 'Rheumatoid Arthritis' 'Sleep Apnea' 'Tuberculosis' 'Type 2 Diabetes'
 'Urinary Tract Infection']

Preparing test labels...
Total unique tests: 94
Test classes: ['Albumin Test' 'Allergy Tests' 'Anti-CCP Antibodies' 'Arterial Blood Gas'
 'Barium Swallow' 'Biopsy' 'Blood Calcium Test'
 'Blood Pressure Monitoring' 'Blood Tests' 'Blood Urea Nitrogen']...

Splitting data...
Training samples: 1600
Validation samples: 400


In [5]:
print("\nLoading BioBERT tokenizer...")
model_name = "dmis-lab/biobert-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize all texts
print("Tokenizing data...")
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)


Loading BioBERT tokenizer...


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Tokenizing data...


In [6]:
class MedicalDataset(Dataset):
    def __init__(self, encodings, disease_labels, test_labels):
        self.encodings = encodings
        self.disease_labels = disease_labels
        self.test_labels = test_labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.disease_labels[idx], dtype=torch.long)
        # For multi-label, we'll handle tests separately
        item['test_labels'] = torch.tensor(self.test_labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.disease_labels)

train_dataset = MedicalDataset(train_encodings, train_disease_labels, train_test_labels)
val_dataset = MedicalDataset(val_encodings, val_disease_labels, val_test_labels)


In [8]:
!wandb disabled

W&B disabled.


In [11]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [12]:
print("\n" + "="*80)
print("TRAINING DISEASE PREDICTION MODEL")
print("="*80)

# Load BioBERT model for disease classification
disease_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_diseases,
    problem_type="single_label_classification"
)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results_disease',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy="epoch",  # Changed from evaluation_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    learning_rate=2e-5,
    save_total_limit=2,
)

# Compute metrics for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1
    }

# Create Trainer
trainer = Trainer(
    model=disease_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Train the model
print("\nTraining disease prediction model...")
trainer.train()

# Evaluate
print("\nEvaluating disease model...")
eval_results = trainer.evaluate()
print(f"Disease Model - Validation Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Disease Model - Validation F1: {eval_results['eval_f1']:.4f}")

# Save disease model
disease_model.save_pretrained('./disease_model')
tokenizer.save_pretrained('./disease_model')
print("\n‚úì Disease model saved to './disease_model'")


TRAINING DISEASE PREDICTION MODEL


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



Training disease prediction model...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.9915,2.819419,0.1175,0.061361
2,2.5835,2.040845,0.8325,0.79955
3,1.5554,0.808959,0.985,0.984946
4,0.5615,0.215894,0.985,0.985134
5,0.1444,0.049536,0.9975,0.9975



Evaluating disease model...


Disease Model - Validation Accuracy: 0.9975
Disease Model - Validation F1: 0.9975

‚úì Disease model saved to './disease_model'


In [13]:
print("\n" + "="*80)
print("TRAINING TEST RECOMMENDATION MODEL")
print("="*80)

# Custom dataset for multi-label
class MultiLabelDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

train_test_dataset = MultiLabelDataset(train_encodings, train_test_labels)
val_test_dataset = MultiLabelDataset(val_encodings, val_test_labels)

# Load BioBERT model for test recommendation
test_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_tests,
    problem_type="multi_label_classification"
)

# Training arguments for multi-label
training_args_tests = TrainingArguments(
    output_dir='./results_tests',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs_tests',
    logging_steps=100,
    eval_strategy="epoch",  # Changed from evaluation_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=2e-5,
    save_total_limit=2,
)

# Compute metrics for multi-label
def compute_metrics_multilabel(eval_pred):
    predictions, labels = eval_pred
    # Apply sigmoid and threshold
    predictions = torch.sigmoid(torch.tensor(predictions))
    predictions = (predictions > 0.5).float().numpy()

    # Calculate metrics
    accuracy = accuracy_score(labels, predictions)
    f1_micro = f1_score(labels, predictions, average='micro', zero_division=0)
    f1_macro = f1_score(labels, predictions, average='macro', zero_division=0)

    return {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro
    }

# Create Trainer for tests
trainer_tests = Trainer(
    model=test_model,
    args=training_args_tests,
    train_dataset=train_test_dataset,
    eval_dataset=val_test_dataset,
    compute_metrics=compute_metrics_multilabel,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Train the model
print("\nTraining test recommendation model...")
trainer_tests.train()

# Evaluate
print("\nEvaluating test model...")
eval_results_tests = trainer_tests.evaluate()
print(f"Test Model - Validation Accuracy: {eval_results_tests['eval_accuracy']:.4f}")
print(f"Test Model - Validation F1 (Micro): {eval_results_tests['eval_f1_micro']:.4f}")
print(f"Test Model - Validation F1 (Macro): {eval_results_tests['eval_f1_macro']:.4f}")

# Save test model
test_model.save_pretrained('./test_model')
tokenizer.save_pretrained('./test_model')
print("\n‚úì Test model saved to './test_model'")


TRAINING TEST RECOMMENDATION MODEL


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



Training test recommendation model...


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,0.6679,0.56569,0.0,0.065873,0.018651
2,0.4689,0.35745,0.0,0.0,0.0
3,0.2937,0.23156,0.0,0.0,0.0
4,0.2032,0.180062,0.0,0.0,0.0
5,0.1691,0.162923,0.0,0.0,0.0



Evaluating test model...


Test Model - Validation Accuracy: 0.0000
Test Model - Validation F1 (Micro): 0.0000
Test Model - Validation F1 (Macro): 0.0000

‚úì Test model saved to './test_model'


In [14]:
import pickle

# Save encoders
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

with open('test_mlb.pkl', 'wb') as f:
    pickle.dump(mlb, f)

print("\n‚úì Label encoders saved")


‚úì Label encoders saved


In [15]:
print("\n" + "="*80)
print("CREATING PREDICTION PIPELINE")
print("="*80)

def predict_from_symptoms(symptom_text):
    """
    Predict disease and recommended tests from symptom description

    Args:
        symptom_text (str): Natural language description of symptoms

    Returns:
        dict: Contains predicted disease and recommended tests
    """
    # Tokenize input
    inputs = tokenizer(symptom_text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Predict disease
    with torch.no_grad():
        disease_outputs = disease_model(**inputs)
        disease_logits = disease_outputs.logits
        disease_pred = torch.argmax(disease_logits, dim=1).item()
        disease_probs = torch.softmax(disease_logits, dim=1)[0]
        disease_confidence = disease_probs[disease_pred].item()

    predicted_disease = label_encoder.inverse_transform([disease_pred])[0]

    # Predict tests
    with torch.no_grad():
        test_outputs = test_model(**inputs)
        test_logits = test_outputs.logits
        test_probs = torch.sigmoid(test_logits)[0]
        test_preds = (test_probs > 0.5).int().numpy()

    predicted_tests = mlb.inverse_transform([test_preds])[0]

    # Get test confidence scores
    test_scores = {}
    for i, test in enumerate(mlb.classes_):
        if test_preds[i] == 1:
            test_scores[test] = test_probs[i].item()

    return {
        'disease': predicted_disease,
        'disease_confidence': disease_confidence,
        'recommended_tests': list(predicted_tests),
        'test_confidence_scores': test_scores
    }


CREATING PREDICTION PIPELINE


In [19]:
# Move both models to GPU explicitly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
disease_model = disease_model.to(device)
test_model = test_model.to(device)

def predict_from_symptoms(symptom_text):
    """
    Predict disease and recommended tests from symptom description
    """
    # Tokenize input
    inputs = tokenizer(symptom_text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move inputs to GPU
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Predict disease
    with torch.no_grad():
        disease_outputs = disease_model(**inputs)
        disease_logits = disease_outputs.logits
        disease_pred = torch.argmax(disease_logits, dim=1).item()
        disease_probs = torch.softmax(disease_logits, dim=1)[0]
        disease_confidence = disease_probs[disease_pred].item()

    predicted_disease = label_encoder.inverse_transform([disease_pred])[0]

    # Predict tests
    with torch.no_grad():
        test_outputs = test_model(**inputs)
        test_logits = test_outputs.logits
        test_probs = torch.sigmoid(test_logits)[0]
        test_preds = (test_probs > 0.5).int().cpu().numpy()  # Move to CPU for numpy

    # FIX: Reshape to 2D array for inverse_transform
    predicted_tests = mlb.inverse_transform(test_preds.reshape(1, -1))[0]

    # Get test confidence scores
    test_scores = {}
    for i, test in enumerate(mlb.classes_):
        if test_preds[i] == 1:
            test_scores[test] = test_probs[i].item()

    return {
        'disease': predicted_disease,
        'disease_confidence': disease_confidence,
        'recommended_tests': list(predicted_tests),
        'test_confidence_scores': test_scores
    }

print(f"‚úì Models loaded on {device}")
print("‚úì Prediction function ready")

# Now test it
print("\nTesting the model with example inputs:")
print("="*80)

test_examples = [
    "I have severe chest pain and shortness of breath",
    "I feel very tired and thirsty all the time, and I urinate frequently",
    "I have a persistent cough with fever and chest pain",
    "My joints are painful and stiff, especially in the morning"
]

for example in test_examples:
    print(f"\nInput: '{example}'")
    result = predict_from_symptoms(example)
    print(f"Predicted Disease: {result['disease']} (confidence: {result['disease_confidence']:.2%})")
    print(f"Recommended Tests: {', '.join(result['recommended_tests'])}")
    print("-" * 80)

print("\n" + "="*80)
print("‚úì All predictions completed successfully!")

‚úì Models loaded on cuda
‚úì Prediction function ready

Testing the model with example inputs:

Input: 'I have severe chest pain and shortness of breath'
Predicted Disease: Hypertension (confidence: 52.91%)
Recommended Tests: 
--------------------------------------------------------------------------------

Input: 'I feel very tired and thirsty all the time, and I urinate frequently'
Predicted Disease: Type 2 Diabetes (confidence: 76.43%)
Recommended Tests: 
--------------------------------------------------------------------------------

Input: 'I have a persistent cough with fever and chest pain'
Predicted Disease: Tuberculosis (confidence: 85.52%)
Recommended Tests: 
--------------------------------------------------------------------------------

Input: 'My joints are painful and stiff, especially in the morning'
Predicted Disease: Chronic Kidney Disease (confidence: 14.70%)
Recommended Tests: 
--------------------------------------------------------------------------------

‚úì A

In [20]:
import torch.nn as nn

print("="*80)
print("RETRAINING TEST RECOMMENDATION MODEL WITH FIXES")
print("="*80)

# Custom dataset for multi-label
class MultiLabelDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

train_test_dataset = MultiLabelDataset(train_encodings, train_test_labels)
val_test_dataset = MultiLabelDataset(val_encodings, val_test_labels)

# Load fresh BioBERT model for test recommendation
test_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_tests,
    problem_type="multi_label_classification"
)

# Custom Trainer with weighted loss for class imbalance
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        # Use BCEWithLogitsLoss with pos_weight for class imbalance
        # Calculate positive class weights (inverse frequency)
        pos_weight = torch.tensor([(len(labels) / labels.sum(dim=0)[i].item() if labels.sum(dim=0)[i] > 0 else 1.0)
                                   for i in range(labels.shape[1])]).to(labels.device)
        pos_weight = torch.clamp(pos_weight, max=10.0)  # Cap at 10x weight

        loss_fct = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
        loss = loss_fct(logits, labels)

        return (loss, outputs) if return_outputs else loss

# Updated training arguments
training_args_tests = TrainingArguments(
    output_dir='./results_tests_v2',
    num_train_epochs=10,  # More epochs
    per_device_train_batch_size=8,  # Smaller batch size
    per_device_eval_batch_size=8,
    warmup_steps=200,
    weight_decay=0.01,
    logging_dir='./logs_tests_v2',
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=3e-5,  # Slightly higher learning rate
    save_total_limit=2,
    metric_for_best_model="f1_micro",
)

# Compute metrics for multi-label (with lower threshold)
def compute_metrics_multilabel_v2(eval_pred):
    predictions, labels = eval_pred
    # Apply sigmoid and use lower threshold
    predictions = torch.sigmoid(torch.tensor(predictions))
    predictions_05 = (predictions > 0.5).float().numpy()
    predictions_03 = (predictions > 0.3).float().numpy()

    # Calculate metrics for both thresholds
    f1_micro_05 = f1_score(labels, predictions_05, average='micro', zero_division=0)
    f1_micro_03 = f1_score(labels, predictions_03, average='micro', zero_division=0)

    # Count predictions
    num_preds_05 = predictions_05.sum()
    num_preds_03 = predictions_03.sum()

    return {
        'f1_micro': f1_micro_03,  # Use 0.3 threshold for evaluation
        'f1_micro_05': f1_micro_05,
        'num_predictions_05': num_preds_05,
        'num_predictions_03': num_preds_03,
    }

# Create Weighted Trainer for tests
trainer_tests = WeightedTrainer(
    model=test_model,
    args=training_args_tests,
    train_dataset=train_test_dataset,
    eval_dataset=val_test_dataset,
    compute_metrics=compute_metrics_multilabel_v2,
)

# Train the model
print("\nRetraining test recommendation model with class balancing...")
print("This will take about 10-15 minutes...\n")
trainer_tests.train()

# Evaluate
print("\nEvaluating improved test model...")
eval_results_tests = trainer_tests.evaluate()
print(f"\nTest Model V2 Results:")
print(f"  F1 (Micro) @ 0.3 threshold: {eval_results_tests['eval_f1_micro']:.4f}")
print(f"  F1 (Micro) @ 0.5 threshold: {eval_results_tests['eval_f1_micro_05']:.4f}")
print(f"  Predictions @ 0.3 threshold: {eval_results_tests['eval_num_predictions_03']:.0f}")
print(f"  Predictions @ 0.5 threshold: {eval_results_tests['eval_num_predictions_05']:.0f}")

# Save improved test model
test_model.save_pretrained('./test_model_v2')
print("\n‚úì Improved test model saved to './test_model_v2'")

# Update global test_model
test_model = test_model.to(device)
print("‚úì Model loaded to GPU and ready for predictions")

RETRAINING TEST RECOMMENDATION MODEL WITH FIXES


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



Retraining test recommendation model with class balancing...
This will take about 10-15 minutes...



Epoch,Training Loss,Validation Loss,F1 Micro,F1 Micro 05,Num Predictions 05,Num Predictions 03
1,0.5836,0.549689,0.210145,0.201481,585.0,9048.0
2,0.3758,0.353505,0.447443,0.633362,2043.0,4934.0
3,0.2803,0.265802,0.581348,0.688732,2669.0,3514.0
4,0.2322,0.223304,0.629508,0.699681,2639.0,3135.0
5,0.2059,0.195674,0.662678,0.707389,2620.0,2906.0
6,0.1858,0.179031,0.67701,0.708661,2624.0,2814.0
7,0.1731,0.168504,0.690316,0.708549,2619.0,2732.0
8,0.1644,0.162946,0.695652,0.708261,2615.0,2700.0
9,0.1633,0.158442,0.69683,0.70858,2616.0,2693.0
10,0.1574,0.157154,0.697505,0.709279,2612.0,2689.0



Evaluating improved test model...



Test Model V2 Results:
  F1 (Micro) @ 0.3 threshold: 0.6975
  F1 (Micro) @ 0.5 threshold: 0.7093
  Predictions @ 0.3 threshold: 2689
  Predictions @ 0.5 threshold: 2612

‚úì Improved test model saved to './test_model_v2'
‚úì Model loaded to GPU and ready for predictions


In [21]:
# Update the prediction function to use the new model
def predict_from_symptoms(symptom_text, threshold=0.3):
    """
    Predict disease and recommended tests from symptom description
    """
    # Tokenize input
    inputs = tokenizer(symptom_text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move inputs to GPU
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Predict disease
    with torch.no_grad():
        disease_outputs = disease_model(**inputs)
        disease_logits = disease_outputs.logits
        disease_pred = torch.argmax(disease_logits, dim=1).item()
        disease_probs = torch.softmax(disease_logits, dim=1)[0]
        disease_confidence = disease_probs[disease_pred].item()

    predicted_disease = label_encoder.inverse_transform([disease_pred])[0]

    # Predict tests with the NEW model
    with torch.no_grad():
        test_outputs = test_model(**inputs)
        test_logits = test_outputs.logits
        test_probs = torch.sigmoid(test_logits)[0]
        test_preds = (test_probs > threshold).int().cpu().numpy()

    predicted_tests = mlb.inverse_transform(test_preds.reshape(1, -1))[0]

    # Get test confidence scores
    test_scores = {}
    for i, test in enumerate(mlb.classes_):
        if test_preds[i] == 1:
            test_scores[test] = test_probs[i].item()

    # If no tests predicted, get top 5 by probability
    if len(predicted_tests) == 0:
        top_k = 5
        top_probs, top_indices = torch.topk(test_probs, k=top_k)
        predicted_tests = [mlb.classes_[idx] for idx in top_indices.cpu().numpy()]
        test_scores = {mlb.classes_[idx]: prob.item() for idx, prob in zip(top_indices.cpu().numpy(), top_probs)}
        print(f"  ‚ö†Ô∏è No tests passed threshold {threshold}, showing top {top_k}")

    return {
        'disease': predicted_disease,
        'disease_confidence': disease_confidence,
        'recommended_tests': list(predicted_tests),
        'test_confidence_scores': test_scores
    }

# Test the improved model
print("\n" + "="*80)
print("TESTING IMPROVED MODEL")
print("="*80)

test_examples = [
    "I have severe chest pain and shortness of breath",
    "I feel very tired and thirsty all the time, and I urinate frequently",
    "I have a persistent cough with fever and chest pain",
    "My joints are painful and stiff, especially in the morning",
    "I have blood in my urine and pain when urinating",
    "I feel dizzy and have frequent headaches with vision problems",
]

for i, example in enumerate(test_examples, 1):
    print(f"\n[Test {i}] Input: '{example}'")
    print("-" * 80)
    result = predict_from_symptoms(example, threshold=0.3)
    print(f"üîç Predicted Disease: {result['disease']}")
    print(f"   Confidence: {result['disease_confidence']:.1%}")
    print(f"\nüíâ Recommended Tests ({len(result['recommended_tests'])}):")

    # Sort tests by confidence
    sorted_tests = sorted(result['test_confidence_scores'].items(),
                         key=lambda x: x[1], reverse=True)

    for test, score in sorted_tests[:8]:  # Show top 8 tests
        print(f"   ‚Ä¢ {test}: {score:.1%}")

    print("=" * 80)

print("\n‚úÖ Testing complete!")


TESTING IMPROVED MODEL

[Test 1] Input: 'I have severe chest pain and shortness of breath'
--------------------------------------------------------------------------------
üîç Predicted Disease: Hypertension
   Confidence: 52.9%

üíâ Recommended Tests (11):
   ‚Ä¢ ECG: 86.9%
   ‚Ä¢ Echocardiogram: 83.1%
   ‚Ä¢ Blood Tests: 75.8%
   ‚Ä¢ Kidney Function Test: 67.0%
   ‚Ä¢ Blood Pressure Monitoring: 62.7%
   ‚Ä¢ Cholesterol Test: 62.6%
   ‚Ä¢ Urinalysis: 60.2%
   ‚Ä¢ Coronary Angiography: 48.1%

[Test 2] Input: 'I feel very tired and thirsty all the time, and I urinate frequently'
--------------------------------------------------------------------------------
üîç Predicted Disease: Type 2 Diabetes
   Confidence: 76.4%

üíâ Recommended Tests (7):
   ‚Ä¢ Urinalysis: 82.6%
   ‚Ä¢ Kidney Ultrasound: 53.8%
   ‚Ä¢ Lipid Profile: 36.5%
   ‚Ä¢ Urine Culture: 36.0%
   ‚Ä¢ Cystoscopy: 35.8%
   ‚Ä¢ Complete Blood Count: 34.8%
   ‚Ä¢ Random Blood Sugar Test: 30.3%

[Test 3] Input: 'I have a per

In [22]:
from google.colab import files
import os

print("="*80)
print("DOWNLOADING MODEL FILES TO YOUR COMPUTER")
print("="*80)

# Check what's already saved
print("\nüìÇ Checking saved files...")

saved_dirs = []
if os.path.exists('./disease_model'):
    saved_dirs.append('disease_model')
if os.path.exists('./disease_model_final'):
    saved_dirs.append('disease_model_final')
if os.path.exists('./test_model_v2'):
    saved_dirs.append('test_model_v2')
if os.path.exists('./test_model_final'):
    saved_dirs.append('test_model_final')

print(f"Found directories: {saved_dirs}")

# Use the best available models
disease_model_dir = './disease_model_final' if os.path.exists('./disease_model_final') else './disease_model'
test_model_dir = './test_model_final' if os.path.exists('./test_model_final') else './test_model_v2'

print(f"\nUsing:")
print(f"  Disease Model: {disease_model_dir}")
print(f"  Test Model: {test_model_dir}")

# Create the inference script
print("\n1Ô∏è‚É£ Creating inference script...")
inference_script = f'''import torch
import pickle
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load models and encoders
print("Loading models...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = AutoTokenizer.from_pretrained('./disease_model')
disease_model = AutoModelForSequenceClassification.from_pretrained('./disease_model').to(device)
test_model = AutoModelForSequenceClassification.from_pretrained('./test_model').to(device)

with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

with open('test_mlb.pkl', 'rb') as f:
    mlb = pickle.load(f)

print("Models loaded successfully!")

def predict_from_symptoms(symptom_text, threshold=0.3):
    """Predict disease and recommended tests from symptom description"""
    inputs = tokenizer(symptom_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {{key: val.to(device) for key, val in inputs.items()}}

    with torch.no_grad():
        disease_outputs = disease_model(**inputs)
        disease_logits = disease_outputs.logits
        disease_pred = torch.argmax(disease_logits, dim=1).item()
        disease_probs = torch.softmax(disease_logits, dim=1)[0]
        disease_confidence = disease_probs[disease_pred].item()

    predicted_disease = label_encoder.inverse_transform([disease_pred])[0]

    with torch.no_grad():
        test_outputs = test_model(**inputs)
        test_logits = test_outputs.logits
        test_probs = torch.sigmoid(test_logits)[0]
        test_preds = (test_probs > threshold).int().cpu().numpy()

    predicted_tests = mlb.inverse_transform(test_preds.reshape(1, -1))[0]

    test_scores = {{}}
    for i, test in enumerate(mlb.classes_):
        if test_preds[i] == 1:
            test_scores[test] = test_probs[i].item()

    if len(predicted_tests) == 0:
        top_k = 5
        top_probs, top_indices = torch.topk(test_probs, k=top_k)
        predicted_tests = [mlb.classes_[idx] for idx in top_indices.cpu().numpy()]
        test_scores = {{mlb.classes_[idx]: prob.item() for idx, prob in zip(top_indices.cpu().numpy(), top_probs)}}

    return {{
        'disease': predicted_disease,
        'disease_confidence': disease_confidence,
        'recommended_tests': list(predicted_tests),
        'test_confidence_scores': test_scores
    }}

if __name__ == "__main__":
    examples = [
        "I have severe chest pain and shortness of breath",
        "I feel very tired and thirsty all the time",
    ]

    for symptom in examples:
        print(f"\\nSymptoms: {{symptom}}")
        result = predict_from_symptoms(symptom)
        print(f"Disease: {{result['disease']}} ({{result['disease_confidence']:.1%}})")
        print(f"Tests: {{', '.join(result['recommended_tests'][:5])}}")
'''

with open('predict.py', 'w') as f:
    f.write(inference_script)
print("   ‚úì Created: predict.py")

# Create README
print("\n2Ô∏è‚É£ Creating README...")
readme = '''# Medical Diagnosis Model

## Quick Start

### 1. Install Dependencies
```bash
pip install transformers torch scikit-learn
```

### 2. Run Predictions
```bash
python predict.py
```

### 3. Use in Your Code
```python
from predict import predict_from_symptoms

result = predict_from_symptoms("I have chest pain and difficulty breathing")
print(f"Disease: {result['disease']}")
print(f"Tests: {result['recommended_tests']}")
```

## Files:
- disease_model/ - Disease prediction model
- test_model/ - Test recommendation model
- label_encoder.pkl - Disease encoder
- test_mlb.pkl - Test encoder
- predict.py - Inference script
'''

with open('README.md', 'w') as f:
    f.write(readme)
print("   ‚úì Created: README.md")

# Zip everything
print("\n3Ô∏è‚É£ Creating zip file...")
print("   This may take 2-3 minutes...")

# Rename directories for cleaner package
!cp -r {disease_model_dir} disease_model 2>/dev/null || :
!cp -r {test_model_dir} test_model 2>/dev/null || :

!zip -r -q medical_diagnosis_model.zip disease_model/ test_model/ label_encoder.pkl test_mlb.pkl predict.py README.md

zip_size = os.path.getsize('medical_diagnosis_model.zip') / (1024*1024)
print(f"   ‚úì Zip created: {zip_size:.1f} MB")

# Download
print("\n4Ô∏è‚É£ Starting download...")
print("   ‚¨áÔ∏è Your browser will prompt you to save the file...")
files.download('medical_diagnosis_model.zip')

print("\n" + "="*80)
print("‚úÖ DOWNLOAD COMPLETE!")
print("="*80)
print("\nüì¶ Extract the zip file on your computer")
print("üìÅ You'll get these folders:")
print("   ‚Ä¢ disease_model/")
print("   ‚Ä¢ test_model/")
print("   ‚Ä¢ label_encoder.pkl")
print("   ‚Ä¢ test_mlb.pkl")
print("   ‚Ä¢ predict.py")
print("   ‚Ä¢ README.md")
print("\n‚ñ∂Ô∏è  Then run: python predict.py")

DOWNLOADING MODEL FILES TO YOUR COMPUTER

üìÇ Checking saved files...
Found directories: ['disease_model', 'test_model_v2']

Using:
  Disease Model: ./disease_model
  Test Model: ./test_model_v2

1Ô∏è‚É£ Creating inference script...
   ‚úì Created: predict.py

2Ô∏è‚É£ Creating README...
   ‚úì Created: README.md

3Ô∏è‚É£ Creating zip file...
   This may take 2-3 minutes...
   ‚úì Zip created: 1532.0 MB

4Ô∏è‚É£ Starting download...
   ‚¨áÔ∏è Your browser will prompt you to save the file...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úÖ DOWNLOAD COMPLETE!

üì¶ Extract the zip file on your computer
üìÅ You'll get these folders:
   ‚Ä¢ disease_model/
   ‚Ä¢ test_model/
   ‚Ä¢ label_encoder.pkl
   ‚Ä¢ test_mlb.pkl
   ‚Ä¢ predict.py
   ‚Ä¢ README.md

‚ñ∂Ô∏è  Then run: python predict.py


In [23]:
from google.colab import files
import os

print("="*80)
print("DOWNLOADING ESSENTIAL FILES ONLY")
print("="*80)

# List of files you need
essential_files = [
    'disease_model',
    'test_model_v2',  # This is your improved test model
    'label_encoder.pkl',
    'test_mlb.pkl',
    'predict.py',
    'README.md'
]

# Check what exists
print("\nüìÇ Checking files...")
for item in essential_files:
    exists = "‚úì" if os.path.exists(item) else "‚úó"
    print(f"  {exists} {item}")

# Create a smaller zip with only essentials
print("\nüì¶ Creating optimized zip file...")
print("   (This will be smaller and faster to download)")

# Create the zip
!zip -r -q medical_model_essential.zip \
    disease_model/ \
    test_model_v2/ \
    label_encoder.pkl \
    test_mlb.pkl \
    predict.py \
    README.md

# Check size
zip_size = os.path.getsize('medical_model_essential.zip') / (1024*1024)
print(f"   ‚úì Zip created: {zip_size:.1f} MB")

# Download
print("\n‚¨áÔ∏è  Starting download...")
files.download('medical_model_essential.zip')

print("\n‚úÖ DOWNLOAD COMPLETE!")
print("="*80)

DOWNLOADING ESSENTIAL FILES ONLY

üìÇ Checking files...
  ‚úì disease_model
  ‚úì test_model_v2
  ‚úì label_encoder.pkl
  ‚úì test_mlb.pkl
  ‚úì predict.py
  ‚úì README.md

üì¶ Creating optimized zip file...
   (This will be smaller and faster to download)
   ‚úì Zip created: 1148.9 MB

‚¨áÔ∏è  Starting download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úÖ DOWNLOAD COMPLETE!


In [24]:
from google.colab import drive
import shutil

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# Create folder in Drive
drive_folder = '/content/drive/MyDrive/medical_diagnosis_model'
!mkdir -p "{drive_folder}"

print(f"\nüìÅ Copying files to: {drive_folder}")

# Copy files to Drive
print("\n1Ô∏è‚É£ Copying disease_model...")
!cp -r disease_model "{drive_folder}/"
print("   ‚úì Done")

print("\n2Ô∏è‚É£ Copying test_model_v2...")
!cp -r test_model_v2 "{drive_folder}/"
print("   ‚úì Done")

print("\n3Ô∏è‚É£ Copying pickle files...")
!cp label_encoder.pkl test_mlb.pkl "{drive_folder}/"
print("   ‚úì Done")

print("\n4Ô∏è‚É£ Copying scripts...")
!cp predict.py README.md "{drive_folder}/"
print("   ‚úì Done")

print("\n" + "="*80)
print("‚úÖ ALL FILES COPIED TO GOOGLE DRIVE!")
print("="*80)
print(f"\nüìÇ Location: Google Drive > medical_diagnosis_model/")
print("\nüí° Now you can:")
print("1. Open Google Drive in your browser")
print("2. Right-click the 'medical_diagnosis_model' folder")
print("3. Select 'Download'")
print("4. Google Drive will zip it for you")

Mounting Google Drive...
Mounted at /content/drive

üìÅ Copying files to: /content/drive/MyDrive/medical_diagnosis_model

1Ô∏è‚É£ Copying disease_model...
   ‚úì Done

2Ô∏è‚É£ Copying test_model_v2...
   ‚úì Done

3Ô∏è‚É£ Copying pickle files...
   ‚úì Done

4Ô∏è‚É£ Copying scripts...
   ‚úì Done

‚úÖ ALL FILES COPIED TO GOOGLE DRIVE!

üìÇ Location: Google Drive > medical_diagnosis_model/

üí° Now you can:
1. Open Google Drive in your browser
2. Right-click the 'medical_diagnosis_model' folder
3. Select 'Download'
4. Google Drive will zip it for you
