In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_selection import SelectKBest, chi2
import joblib

# Load the dataset
df = pd.read_csv('../preprocessed_symptoms.csv')

# Data Preparation
# We'll use all the symptom columns as features (columns after 'Symptom_Count')
symptom_columns = df.columns[7:]  # All columns after 'Symptom_Count'
X = df[symptom_columns]
y = df['Disease']

# Encode the target variable (diseases)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Feature Selection - Select top 30 most important features
selector = SelectKBest(chi2, k=30)
X_selected = selector.fit_transform(X, y_encoded)

# Get the selected feature names
selected_features = X.columns[selector.get_support()]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_selected, y_encoded, test_size=0.2, random_state=42
)

# Train the SVC model
svc_model = SVC(kernel='linear', C=1.0, probability=True, random_state=42)
svc_model.fit(X_train, y_train)

# Evaluate the model
y_pred = svc_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Save the trained components
joblib.dump(svc_model, 'disease_svc_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(selector, 'feature_selector.pkl')
joblib.dump(selected_features, 'selected_features.pkl')

# Load the medicine mapping file
medicine_mapping = pd.read_csv('..\data\medications.csv')

# Function to recommend medicines based on the predicted disease
def recommend_medicine(disease):
    """
    Recommend medicines based on the predicted disease.
    
    Args:
        disease (str): The name of the predicted disease.
    
    Returns:
        list: List of recommended medicines for the disease.
    """
    medicines = medicine_mapping[medicine_mapping['Disease'] == disease]['Medication'].tolist()
    return medicines if medicines else ["Consult a doctor for proper medication"]

# Create a function for making predictions and recommending medicines
def predict_disease_and_medicine(symptoms):
    """
    Predict the disease and recommend medicines based on input symptoms.
    
    Args:
        symptoms (dict): Dictionary of symptoms where keys are symptom names 
                         and values are 1 (present) or 0 (absent).
    
    Returns:
        dict: Dictionary containing predicted disease, probabilities, and recommended medicines.
    """
    # Load the saved components
    model = joblib.load('disease_svc_model.pkl')
    encoder = joblib.load('label_encoder.pkl')
    selector = joblib.load('feature_selector.pkl')
    features = joblib.load('selected_features.pkl')
    
    # Create a feature vector with all zeros
    feature_vector = pd.DataFrame(np.zeros((1, len(symptom_columns))), 
                                columns=symptom_columns)
    
    # Set the provided symptoms to 1
    for symptom, value in symptoms.items():
        if symptom in feature_vector.columns:
            feature_vector[symptom] = value
    
    # Select the same features used in training
    selected_features_vector = selector.transform(feature_vector)
    
    # Make prediction
    prediction = model.predict(selected_features_vector)
    probabilities = model.predict_proba(selected_features_vector)[0]
    
    # Get the disease name and probabilities for all diseases
    disease = encoder.inverse_transform(prediction)[0]
    disease_probs = {encoder.classes_[i]: prob 
                    for i, prob in enumerate(probabilities)}
    
    # Sort diseases by probability
    sorted_probs = sorted(disease_probs.items(), 
                         key=lambda x: x[1], reverse=True)
    
    # Recommend medicines for the predicted disease
    medicines = recommend_medicine(disease)
    
    return {
        'predicted_disease': disease,
        'probabilities': dict(sorted_probs),
        'recommended_medicines': medicines
    }

# Example usage with user input
if __name__ == "__main__":
    print("Welcome to the Disease Prediction and Medicine Recommendation System!")
    print("Please enter your symptoms below. Type 'done' when you are finished.")
    
    # Collect symptoms from the user
    symptoms = {}
    while True:
        symptom = input("Enter a symptom (e.g., itching, skin_rash): ").strip().lower()
        if symptom == 'done':
            break
        if symptom in symptom_columns:
            symptoms[symptom] = 1
            print(f"Symptom '{symptom}' added.")
        else:
            print(f"Symptom '{symptom}' not recognized. Please try again.")
    
    # If no symptoms were entered, exit
    if not symptoms:
        print("No symptoms entered. Exiting...")
        exit()
    
    # Predict disease and recommend medicines
    result = predict_disease_and_medicine(symptoms)
    
    # Display results
    print("\nPrediction Result:")
    print(f"Predicted Disease: {result['predicted_disease']}")
    print("Probabilities:")
    for disease, prob in result['probabilities'].items():
          print(f"{disease}: {prob:.4f}")
    print("\nRecommended Medicines:")
    for medicine in result['recommended_medicines']:
        print(medicine)

  medicine_mapping = pd.read_csv('..\data\medications.csv')
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model Accuracy: 0.43

Classification Report:
                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       0.00      0.00      0.00        18
                                   AIDS       0.00      0.00      0.00        30
                                   Acne       0.00      0.00      0.00        24
                    Alcoholic hepatitis       1.00      0.96      0.98        25
                                Allergy       0.00      0.00      0.00        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       0.00      0.00      0.00        33
                   Cervical spondylosis       1.00      0.87      0.93        23
                            Chicken pox       0.00      0.00      0.00        21
                    Chronic cholestasis       0.03      1.00      0.05        15
                            Common Cold       0.00      0.00   