In [1]:
!pip install transformers



In [1]:
import pandas as pd
from transformers import pipeline

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
!pip install tqdm



In [5]:
from tqdm import tqdm

In [15]:
data = pd.read_csv('/content/gdrive/MyDrive/AI final 2024/disease to description/disease to description-1.csv', encoding="ISO-8859-1")

# description, disease label
descriptions = data['description'].tolist()
diseases = data['diseases'].unique().tolist()

# Zero-Shot Classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# predict function
def predict_disease(descriptions, labels, batch_size=8):

    labels = list(set(label.strip() for label in labels))
    results = []
    for i in tqdm(range(0, len(descriptions), batch_size), desc="Processing batches"):
        batch_descriptions = descriptions[i:i + batch_size]
        batch_results = classifier(
            batch_descriptions,
            candidate_labels=labels,
        )
        results.extend(batch_results)
    return results



# Raw test

In [None]:

# small test
test_description = "fever and sore throat"
result_list = predict_disease(test_description, diseases)
print(' ')
for result in result_list:
    print(f"Description: {test_description}")
    print(f"Predicted diseases: {result['labels']}")
    print(f"Scores: {result['scores']}")

    # print
    for label, score in zip(result['labels'], result['scores']):
        score = score * 100
        print(f"Disease: {label}, Score: {score:.4f}%")


Processing batches: 100%|██████████| 1/1 [00:03<00:00,  3.89s/it]

 
Description: fever and sore throat
Predicted diseases: ['Lymphocytic Choriomeningitis ', 'Marine Toxins ', 'Parasites - Trichuriasis ', 'Parasites - Cysticercosis', 'Parasites - Cysticercosis ']
Scores: [0.34865620732307434, 0.19034601747989655, 0.1726599782705307, 0.15773619711399078, 0.13060161471366882]
Disease: Lymphocytic Choriomeningitis , Score: 0.3487
Disease: Marine Toxins , Score: 0.1903
Disease: Parasites - Trichuriasis , Score: 0.1727
Disease: Parasites - Cysticercosis, Score: 0.1577
Disease: Parasites - Cysticercosis , Score: 0.1306





In [None]:

# test (all data)
test_description = "fever and sore throat"
result_list = predict_disease(test_description, diseases)
print('\n')
for result in result_list:
    print(f"Description: {test_description}\n")
    #print(f"Predicted diseases: {result['labels']}")
    #print(f"Scores: {result['scores']}")

    # print top 5
    for i, (label, score) in enumerate(zip(result['labels'], result['scores'])):
        if(i >= 5):
          break
        score = score * 100
        print(f"Top{i+1} Disease: {label}, Score: {score:.2f}%")
print('\n')
print('If you experience any symptoms, please seek medical attention promptly.')

Processing batches: 100%|██████████| 1/1 [1:13:04<00:00, 4384.98s/it]



Description: fever and sore throat

Top1 Disease: Sore Throat , Score: 0.80%
Top2 Disease: Sore Throat, Score: 0.77%
Top3 Disease: Fever, Score: 0.64%
Top4 Disease: Fever , Score: 0.40%
Top5 Disease: Cough , Score: 0.38%


If you experience any symptoms, please seek medical attention promptly.





# Formal test

In [16]:

test_description = [
    "Recurring fevers, chills, and sweats caused by mosquito-borne parasites.",
    "Persistent cough, weight loss, night sweats, and fatigue due to bacterial infection."
]
result_list = predict_disease(test_description, diseases)
print('\n')
for i, result in enumerate(result_list):
    print('\n')
    print(f"Description: {test_description[i]}\n")
    #print(f"Predicted diseases: {result['labels']}")
    #print(f"Scores: {result['scores']}")

    # print top 5

    for i, (label, score) in enumerate(zip(result['labels'], result['scores'])):

        if(i >= 5):
          break
        score = score * 100
        print(f"Top{i+1} Disease: {label}, Score: {score:.2f}%")
    print('\n')
    print('If you experience any symptoms, please seek medical attention promptly.')
    print('____________________________________________________________________________________')

Processing batches: 100%|██████████| 1/1 [1:47:03<00:00, 6423.83s/it]





Description: Recurring fevers, chills, and sweats caused by mosquito-borne parasites.

Top1 Disease: Sweat, Score: 0.28%
Top2 Disease: Fever, Score: 0.22%
Top3 Disease: Parasitic Diseases, Score: 0.22%
Top4 Disease: common variable immune deficiency, Score: 0.19%
Top5 Disease: Dual Diagnosis, Score: 0.17%


If you experience any symptoms, please seek medical attention promptly.
____________________________________________________________________________________


Description: Persistent cough, weight loss, night sweats, and fatigue due to bacterial infection.

Top1 Disease: Bacterial Infections, Score: 1.28%
Top2 Disease: Breathing Problems, Score: 1.03%
Top3 Disease: Cough, Score: 0.91%
Top4 Disease: common variable immune deficiency, Score: 0.27%
Top5 Disease: Dual Diagnosis, Score: 0.25%


If you experience any symptoms, please seek medical attention promptly.
____________________________________________________________________________________





In [17]:

test_description = [
    "Recurring fevers, chills, and sweats caused by mosquito-borne parasites.",
    "Persistent cough, weight loss, night sweats, and fatigue due to bacterial infection."
]
result_list = predict_disease(test_description, diseases)
print('\n')
for i, result in enumerate(result_list):
    print('\n')
    print(f"Description: {test_description[i]}\n")
    #print(f"Predicted diseases: {result['labels']}")
    #print(f"Scores: {result['scores']}")

    # print top 20

    for i, (label, score) in enumerate(zip(result['labels'], result['scores'])):

        if(i >= 20):
          break
        score = score * 100
        print(f"Top{i+1} Disease: {label}, Score: {score:.2f}%")
    print('\n')
    print('If you experience any symptoms, please seek medical attention promptly.')
    print('____________________________________________________________________________________')

Processing batches: 100%|██████████| 1/1 [1:46:07<00:00, 6367.55s/it]





Description: Recurring fevers, chills, and sweats caused by mosquito-borne parasites.

Top1 Disease: Sweat, Score: 0.28%
Top2 Disease: Fever, Score: 0.22%
Top3 Disease: Parasitic Diseases, Score: 0.22%
Top4 Disease: common variable immune deficiency, Score: 0.19%
Top5 Disease: Dual Diagnosis, Score: 0.17%
Top6 Disease: multiminicore disease, Score: 0.14%
Top7 Disease: Coping with Chronic Illness, Score: 0.14%
Top8 Disease: Health Disparities, Score: 0.13%
Top9 Disease: Common variable immunodeficiency, Score: 0.13%
Top10 Disease: Evaluating Health Information, Score: 0.12%
Top11 Disease: Disseminated superficial actinic porokeratosis, Score: 0.12%
Top12 Disease: Acute intermittent porphyria, Score: 0.11%
Top13 Disease: Nevi flammei, familial multiple, Score: 0.11%
Top14 Disease: Carney complex, Score: 0.11%
Top15 Disease: Chronic atypical neutrophilic dermatosis with lipodystrophy and elevated temperature, Score: 0.11%
Top16 Disease: Immune System and Disorders, Score: 0.10%
Top17 


