In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle  # Import pickle for saving models


In [53]:

# Load the dataset
dataset = pd.read_csv('dataset/Training.csv')


In [54]:
# Separate features and target
X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']


In [55]:

# Encode the target (prognosis)
le = LabelEncoder()
Y = le.fit_transform(y)

In [56]:
# Save the LabelEncoder
pickle.dump(le, open('label_encoder.pkl', 'wb'))  # Save the encoder

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)

print("Dataset loaded and preprocessed successfully!")
print(dataset.head(5))


Dataset loaded and preprocessed successfully!
   itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0        1          1                     1                    0          0   
1        0          1                     1                    0          0   
2        1          0                     1                    0          0   
3        1          1                     0                    0          0   
4        1          1                     1                    0          0   

   chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  ...  \
0       0           0             0        0                 0  ...   
1       0           0             0        0                 0  ...   
2       0           0             0        0                 0  ...   
3       0           0             0        0                 0  ...   
4       0           0             0        0                 0  ...   

   blackheads  scurring  skin_peeling  silver_like_d

In [51]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix


In [52]:
# Dictionary of models
models = {
    'SVC': SVC(kernel='linear'),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'KNeighbors': KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    # Model performance metrics
    accuracy = accuracy_score(y_test, predictions)
    cm = confusion_matrix(y_test, predictions)
    
    print(f"{model_name} Accuracy: {accuracy}")
    print(f"{model_name} Confusion Matrix:")
    print(cm)

SVC Accuracy: 1.0
SVC Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
RandomForest Accuracy: 1.0
RandomForest Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
GradientBoosting Accuracy: 1.0
GradientBoosting Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
KNeighbors Accuracy: 1.0
KNeighbors Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
MultinomialNB Accuracy: 1.0
MultinomialNB Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34 

In [57]:
# Train the SVC model
svc = SVC(kernel='linear')
svc.fit(X_train, y_train)


In [58]:
# Save the trained model using pickle
pickle.dump(svc, open('svc.pkl', 'wb'))

print("SVC model saved successfully!")

SVC model saved successfully!


In [59]:
# Load the saved SVC model
svc = pickle.load(open('svc.pkl', 'rb'))

In [60]:
# Make a prediction for a single test case
predicted_disease = svc.predict(X_test.iloc[0].values.reshape(1, -1))
actual_disease = y_test[0]




In [65]:
# Decode the predicted and actual disease
predicted_disease_decoded = le.inverse_transform(predicted_disease)[0]
actual_disease_decoded = le.inverse_transform([actual_disease])[0]

print(f"Predicted Disease: {predicted_disease_decoded}")
print(f"Actual Disease: {actual_disease_decoded}")

# Load the supplementary data files
sym_des = pd.read_csv("dataset/symtoms_df.csv")
precautions = pd.read_csv("dataset/precautions_df.csv")
workout = pd.read_csv("dataset/workout_df.csv")
description = pd.read_csv("dataset/description.csv")
medications = pd.read_csv('dataset/medications.csv')
diets = pd.read_csv("dataset/diets.csv")

Predicted Disease: hepatitis A
Actual Disease: hepatitis A


In [66]:
# Helper function to get disease-related recommendations
def helper(dis):
    desc = description[description['Disease'] == dis]
    if not desc.empty:
        desc = desc['Description'].values[0]
    else:
        desc = "Description not available."
    
    precautions_filtered = precautions[precautions['Disease'] == dis]
    if not precautions_filtered.empty:
        pre = precautions_filtered[['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']].values.flatten()
    else:
        pre = ["No precautions available."] * 4  # Default message for precautions
    
    med = medications[medications['Disease'] == dis]
    if not med.empty:
        medication = med['Medication'].values[0]
    else:
        medication = "Medication not available."
        
    die = diets[diets['Disease'] == dis]
    if not die.empty:
        diet = die['Diet'].values[0]
    else:
        diet = "Diet information not available."
        
    wrkout = workout[workout['disease'] == dis]
    if not wrkout.empty:
        workout_plan = wrkout['workout'].values[0]
    else:
        workout_plan = "Workout plan not available."
    
    return desc, pre, medication, diet, workout_plan

# Example: Getting recommendations for a predicted disease
predicted_disease_name = predicted_disease_decoded  # Use the disease predicted earlier
desc, precautions, medication, diet, workout_plan = helper(predicted_disease_name)

# Print the recommendations
print(f"\nRecommendations for {predicted_disease_name}:")
print(f"Description: {desc}")
print(f"Precautions: {precautions}")
print(f"Medication: {medication}")
print(f"Diet: {diet}")
print(f"Workout Plan: {workout_plan}")



Recommendations for hepatitis A:
Description: hepatitis A is a viral liver disease.
Precautions: ['Consult nearest hospital' 'wash hands through' 'avoid fatty spicy food'
 'medication']
Medication: ['Vaccination', 'Antiviral drugs', 'IV fluids', 'Blood transfusions', 'Liver transplant']
Diet: ['Hepatitis A Diet', 'High-Calorie Diet', 'Soft and bland foods', 'Hydration', 'Protein-rich foods']
Workout Plan: Stay hydrated
