In [None]:
import pandas as pd
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [4]:
DATA_PATH = 'disease.csv'
MODEL_OUTPUT_DIR = 'models'
MODEL_FILENAME = 'health_model.pkl'

In [5]:
def train_and_save_model():
    """Loads data, trains SVC, and saves the model and symptom/disease lists."""
    # --- 1. Load the Dataset ---
    try:
        df = pd.read_csv(DATA_PATH)
        print(f"✅ Data loaded: {len(df)} rows, {len(df.columns)} columns")
        print(f"Original columns: {df.columns.tolist()}")
    except FileNotFoundError:
        print(f"❌ Dataset not found at {DATA_PATH}. Check file path.")
        return None, None
    except Exception as e:
        print(f"❌ Error loading data: {str(e)}")
        return None, None

    # --- 2. Data Cleaning ---
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_')
    print(f"Cleaned columns: {df.columns.tolist()}")

    # ✅ Use 'diseases' column as target
    TARGET_COLUMN = "diseases"

    if TARGET_COLUMN not in df.columns:
        print(f"❌ Target column '{TARGET_COLUMN}' not found. Available: {df.columns.tolist()}")
        return None, None

    try:
        X = df.drop(columns=[TARGET_COLUMN])
        y = df[TARGET_COLUMN]

        # Handle missing values
        if X.isnull().sum().sum() > 0:
            print("⚠️ Missing values in features → filling with 0")
            X = X.fillna(0)
        if y.isnull().sum() > 0:
            print("⚠️ Missing values in target → dropping those rows")
            mask = ~y.isnull()
            X = X[mask]
            y = y[mask]

    except KeyError as e:
        print(f"❌ Column error: {str(e)}")
        return None, None

    # Feature + target lists
    FULL_SYMPTOMS_LIST = X.columns.tolist()
    DISEASE_NAMES = sorted(y.unique().tolist())
    print(f"✅ Features: {len(FULL_SYMPTOMS_LIST)} | Diseases: {len(DISEASE_NAMES)}")

    # --- 3. Model Training ---
    try:
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        print(f"Training: {len(X_train)} | Testing: {len(X_test)}")

        svc_model = SVC(kernel='linear', C=1.0, random_state=42)
        svc_model.fit(X_train, y_train)

        y_pred = svc_model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"✅ Model Accuracy: {accuracy:.4f}")

    except Exception as e:
        print(f"❌ Training error: {str(e)}")
        return None, None

    # --- 4. Save Assets ---
    try:
        os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
        model_path = os.path.join(MODEL_OUTPUT_DIR, MODEL_FILENAME)

        with open(model_path, 'wb') as file:
            pickle.dump(svc_model, file)

        with open(os.path.join(MODEL_OUTPUT_DIR, 'symptoms_list.pkl'), 'wb') as f:
            pickle.dump(FULL_SYMPTOMS_LIST, f)

        with open(os.path.join(MODEL_OUTPUT_DIR, 'diseases_list.pkl'), 'wb') as f:
            pickle.dump(DISEASE_NAMES, f)

        print(f"✅ Saved model + lists to '{MODEL_OUTPUT_DIR}'")
        return FULL_SYMPTOMS_LIST, DISEASE_NAMES

    except Exception as e:
        print(f"❌ Save error: {str(e)}")
        return None, None

if __name__ == "__main__":
    symptom_list, disease_list = train_and_save_model()
    if symptom_list and disease_list:
        print("🎉 Training complete. 3 files generated (model + symptoms + diseases).")
    else:
        print("❌ Training failed. No files saved.")

✅ Data loaded: 246945 rows, 378 columns
Original columns: ['diseases', 'anxiety and nervousness', 'depression', 'shortness of breath', 'depressive or psychotic symptoms', 'sharp chest pain', 'dizziness', 'insomnia', 'abnormal involuntary movements', 'chest tightness', 'palpitations', 'irregular heartbeat', 'breathing fast', 'hoarse voice', 'sore throat', 'difficulty speaking', 'cough', 'nasal congestion', 'throat swelling', 'diminished hearing', 'lump in throat', 'throat feels tight', 'difficulty in swallowing', 'skin swelling', 'retention of urine', 'groin mass', 'leg pain', 'hip pain', 'suprapubic pain', 'blood in stool', 'lack of growth', 'emotional symptoms', 'elbow weakness', 'back weakness', 'pus in sputum', 'symptoms of the scrotum and testes', 'swelling of scrotum', 'pain in testicles', 'flatulence', 'pus draining from ear', 'jaundice', 'mass in scrotum', 'white discharge from eye', 'irritable infant', 'abusing alcohol', 'fainting', 'hostile behavior', 'drug abuse', 'sharp abdo