<a href="https://colab.research.google.com/github/AqueeqAzam/real-time-core-understanding-of-expert-system/blob/main/nlp_projects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# `Disease diagnosis using expert system and nlp`

In [31]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# Knowledge Base
kb = {
    "Fever": [True, False, True, False, True, False, True, False, True, False],
    "Cough": [True, True, False, True, True, False, False, True, True, False],
    "Headache": [False, True, True, False, True, True, False, True, False, False],
    "Fatigue": [True, False, False, True, True, False, True, True, False, True],
    "Disease": ["COVID-19", "Flu", "Common Cold", "Healthy", "COVID-19", "Common Cold", "COVID-19", "Flu", "COVID-19", "Healthy"]
}

# Inference Engine
def inference_engine():
    # Create a dataset
    data = []
    symptom_keys = list(kb.keys())  # Get a list of symptom keys
    symptom_keys.remove("Disease") # Remove the "Disease" key as it contains diagnoses, not symptoms
    for i in range(100):
        # Randomly select symptoms
        symptoms = np.random.choice(symptom_keys, size=3)
        # Randomly select a diagnosis
        diagnosis = np.random.choice(kb["Disease"])
        data.append({"symptoms": " ".join(symptoms), "diagnosis": diagnosis})

    # Split data into training and testing sets
    train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer()

    # Transform data
    X_train = vectorizer.fit_transform([x["symptoms"] for x in train_data])
    y_train = [x["diagnosis"] for x in train_data]
    X_test = vectorizer.transform([x["symptoms"] for x in test_data])
    y_test = [x["diagnosis"] for x in test_data]

    # Train Random Forest model
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)

    return model, vectorizer

# User Interface
def get_symptoms():
    symptoms = input("Enter your symptoms (comma-separated): ")
    return " ".join([symptom.strip() for symptom in symptoms.split(",")])

# Main function
def main():
    model, vectorizer = inference_engine()
    symptoms = get_symptoms()
    prediction = model.predict(vectorizer.transform([symptoms]))
    print("Prediction:", prediction[0])

if __name__ == "__main__":
    main()

Enter your symptoms (comma-separated): Fever, Headache
Prediction: COVID-19
