In [4]:
import pandas as pd 
import numpy as np 
from tkinter import * 
from tkinter import ttk 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import accuracy_score 
import matplotlib.pyplot as plt 

# --- Data Loading and Preprocessing ---
# Assumes "Training.csv" and "Testing.csv" are available
df = pd.read_csv("Training_Data.csv") 
df_test = pd.read_csv("Testing_Data.csv") 

# Extract symptoms (all columns except the last one)
symptoms = df.columns[:-1] 
diseases = sorted(df["prognosis"].unique()) 

# Create mappings for disease names to numerical indices
disease_to_index = {disease: idx for idx, disease in enumerate(diseases)} 
index_to_disease = {idx: disease for disease, idx in disease_to_index.items()} 

# Convert disease names to numerical indices in the dataframes
df["prognosis"] = df["prognosis"].map(disease_to_index) 
df_test["prognosis"] = df_test["prognosis"].map(disease_to_index) 

# Prepare training data
X = df[symptoms] 
y = df["prognosis"] 

# Split data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42) 

# --- Model Training ---
# Train Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=42).fit(X_train, y_train) 
# Train Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=42).fit(X_train, y_train) 
# Train Gaussian Naive Bayes Classifier
nb_clf = GaussianNB().fit(X_train, y_train)