In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
file_path = "medical_history_data.csv"  # Path to the dataset
df = pd.read_csv(file_path)

# Step 1: Preprocess the Text Data
X = df['description'].str.lower()  # Convert to lowercase for consistency
y = df['diagnosis']

# Step 2: Label Encode the Diagnoses
y = y.factorize()[0]  # Factorize diagnoses to get numeric labels

# Step 3: Vectorize Symptoms using TF-IDF
tfidf = TfidfVectorizer(max_features=500)
X_tfidf = tfidf.fit_transform(X)

# Step 4: Split Data
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Step 5: Train the Model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 6: Make Predictions and Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))


Model Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        22
           2       1.00      1.00      1.00        19
           3       1.00      1.00      1.00        19
           4       1.00      1.00      1.00        19
           5       1.00      1.00      1.00        19
           6       1.00      1.00      1.00        18
           7       1.00      1.00      1.00        25
           8       1.00      1.00      1.00        27
           9       1.00      1.00      1.00        13

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [7]:
new_symptoms = [
    "Patient experiences persistent headache and dizziness.",
    "High fever, chills, muscle aches, and fatigue for a few days.",
    "Burning sensation while urinating, accompanied by lower abdominal discomfort.",
    "Severe lower back pain, especially when bending or lifting.",
    "Sharp chest pain during physical activity, with occasional shortness of breath."
]

# Transform the new symptoms with the trained TF-IDF vectorizer
new_symptoms_tfidf = tfidf.transform(new_symptoms)

# Make predictions
new_predictions = model.predict(new_symptoms_tfidf)

# Map numeric labels back to diagnosis names for clarity
diagnosis_mapping = dict(enumerate(df['diagnosis'].factorize()[1]))
predicted_diagnoses = [diagnosis_mapping[pred] for pred in new_predictions]

# Output predictions
for symptom, diagnosis in zip(new_symptoms, predicted_diagnoses):
    print(f"Symptom: {symptom}\nPredicted Diagnosis: {diagnosis}\n")

Symptom: Patient experiences persistent headache and dizziness.
Predicted Diagnosis: Hypertension

Symptom: High fever, chills, muscle aches, and fatigue for a few days.
Predicted Diagnosis: Influenza

Symptom: Burning sensation while urinating, accompanied by lower abdominal discomfort.
Predicted Diagnosis: Urinary tract infection

Symptom: Severe lower back pain, especially when bending or lifting.
Predicted Diagnosis: Muscle strain

Symptom: Sharp chest pain during physical activity, with occasional shortness of breath.
Predicted Diagnosis: Possible angina



In [8]:
# Single new symptom input
single_symptom = "High fever, chills, muscle aches, and fatigue for a few days."

# Transform the symptom with the trained TF-IDF vectorizer
single_symptom_tfidf = tfidf.transform([single_symptom])

# Make the prediction
single_prediction = model.predict(single_symptom_tfidf)

# Map the numeric label back to the diagnosis name for clarity
diagnosis_mapping = dict(enumerate(df['diagnosis'].factorize()[1]))
predicted_diagnosis = diagnosis_mapping[single_prediction[0]]

# Output the prediction
print(f"Symptom: {single_symptom}")
print(f"Predicted Diagnosis: {predicted_diagnosis}")


Symptom: High fever, chills, muscle aches, and fatigue for a few days.
Predicted Diagnosis: Influenza
