In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
import pickle

# Load the training and testing data
train_df = pd.read_csv('Training.csv')
test_df = pd.read_csv('Testing.csv')

# Drop any columns that are unnamed or irrelevant (such as index columns)
train_df = train_df.loc[:, ~train_df.columns.str.contains('^Unnamed')]
test_df = test_df.loc[:, ~test_df.columns.str.contains('^Unnamed')]

# Separate features and labels
X_train = train_df.drop('prognosis', axis=1)
y_train = train_df['prognosis']

X_test = test_df.drop('prognosis', axis=1)
y_test = test_df['prognosis']

# Handle missing values by imputing with mean
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Encode target labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Train SVM model
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train_imputed, y_train_encoded)

# Predict on test set
y_pred = svm_model.predict(X_test_imputed)

# Evaluate the model
print("✅ Accuracy:", accuracy_score(y_test_encoded, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test_encoded, y_pred, target_names=le.classes_))
print("🧾 Confusion Matrix:\n", confusion_matrix(y_test_encoded, y_pred))

# Save model using pickle
with open('svm_disease_model.sav', 'wb') as model_file:
    pickle.dump(svm_model, model_file)

with open('svm_label_encoder.sav', 'wb') as encoder_file:
    pickle.dump(le, encoder_file)


✅ Accuracy: 1.0

📊 Classification Report:
                                          precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00         1
                                   AIDS       1.00      1.00      1.00         1
                                   Acne       1.00      1.00      1.00         1
                    Alcoholic hepatitis       1.00      1.00      1.00         1
                                Allergy       1.00      1.00      1.00         1
                              Arthritis       1.00      1.00      1.00         1
                       Bronchial Asthma       1.00      1.00      1.00         1
                   Cervical spondylosis       1.00      1.00      1.00         1
                            Chicken pox       1.00      1.00      1.00         1
                    Chronic cholestasis       1.00      1.00      1.00         1
                            Common Cold       1.00      1.00     

In [None]:
X_train.shape

(4920, 132)

In [None]:
import pandas as pd
import numpy as np
import pickle

# Load the saved model and label encoder
with open('svm_disease_model.sav', 'rb') as model_file:
    svm_model = pickle.load(model_file)

with open('svm_label_encoder.sav', 'rb') as encoder_file:
    le = pickle.load(encoder_file)

# Load the original training dataset to get the feature columns
df = pd.read_csv('Training.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
feature_columns = df.columns.drop('prognosis')

# Create input data with all zeros
input_data = pd.DataFrame(np.zeros((1, len(feature_columns))), columns=feature_columns)

# List of user symptoms
user_symptoms = [
    'itching', 'skin_rash', 'shivering',
    'chills', 'joint_pain', 'stomach_pain', 'acidity'
]

# Set those symptoms to 1
for symptom in user_symptoms:
    if symptom in input_data.columns:
        input_data.at[0, symptom] = 1

# Predict disease
pred_encoded = svm_model.predict(input_data)[0]
predicted_disease = le.inverse_transform([pred_encoded])[0]

print("🩺 Predicted Disease:", predicted_disease)





🩺 Predicted Disease: Drug Reaction




In [None]:
# Example symptoms
input_symptoms = ['itching', 'skin_rash', 'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity']

# Clean input
input_symptoms = [s.strip().lower().replace(' ', '_') for s in input_symptoms]

# Prepare input vector
input_vector = [1 if col in input_symptoms else 0 for col in X.columns]

# Predict probability
probs = svm_model.predict_proba([input_vector])[0]

# Get top N predictions
top_n = 3
top_indices = np.argsort(probs)[-top_n:][::-1]
top_diseases = [(le.inverse_transform([i])[0], probs[i]) for i in top_indices if probs[i] > 0]

# Output result
print("\n Top possible disease(s) based on symptoms:")
for disease, prob in top_diseases:
    print(f" - {disease} (confidence: {prob:.2f})")



🧠 Top possible disease(s) based on symptoms:
 - Drug Reaction (confidence: 0.32)
 - Allergy (confidence: 0.16)
 - Fungal infection (confidence: 0.14)
