In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt 
import seaborn as sns 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score  # << مهم جداً
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform


In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
secondary_mushroom = fetch_ucirepo(id=848)

# data (as pandas dataframes)
x = secondary_mushroom.data.features
y = secondary_mushroom.data.targets

# metadata
print(secondary_mushroom.metadata)

# variable information
print(secondary_mushroom.variables)


### Mushroom Dataset Description

**Target Variable (Binary Class):**  
- `edible = e`  
- `poisonous = p` (also includes mushrooms of unknown edibility)  

---

### Features (20 variables: nominal = n, metrical = m)

1. **cap-diameter (m):** float number in cm  
2. **cap-shape (n):**  
   - bell = b  
   - conical = c  
   - convex = x  
   - flat = f  
   - sunken = s  
   - spherical = p  
   - others = o  
3. **cap-surface (n):**  
   - fibrous = i  
   - grooves = g  
   - scaly = y  
   - smooth = s  
   - shiny = h  
   - leathery = l  
   - silky = k  
   - sticky = t  
   - wrinkled = w  
   - fleshy = e  
4. **cap-color (n):**  
   - brown = n  
   - buff = b  
   - gray = g  
   - green = r  
   - pink = p  
   - purple = u  
   - red = e  
   - white = w  
   - yellow = y  
   - blue = l  
   - orange = o  
   - black = k  
5. **does-bruise-bleed (n):**  
   - bruises-or-bleeding = t  
   - no = f  
6. **gill-attachment (n):**  
   - adnate = a  
   - adnexed = x  
   - decurrent = d  
   - free = e  
   - sinuate = s  
   - pores = p  
   - none = f  
   - unknown = ?  
7. **gill-spacing (n):**  
   - close = c  
   - distant = d  
   - none = f  
8. **gill-color (n):** see `cap-color` + none = f  
9. **stem-height (m):** float number in cm  
10. **stem-width (m):** float number in mm  
11. **stem-root (n):**  
    - bulbous = b  
    - swollen = s  
    - club = c  
    - cup = u  
    - equal = e  
    - rhizomorphs = z  
    - rooted = r  
12. **stem-surface (n):** see `cap-surface` + none = f  
13. **stem-color (n):** see `cap-color` + none = f  
14. **veil-type (n):**  
    - partial = p  
    - universal = u  
15. **veil-color (n):** see `cap-color` + none = f  
16. **has-ring (n):**  
    - ring = t  
    - none = f  
17. **ring-type (n):**  
    - cobwebby = c  
    - evanescent = e  
    - flaring = r  
    - grooved = g  
    - large = l  
    - pendant = p  
    - sheathing = s  
    - zone = z  
    - scaly = y  
    - movable = m  
    - none = f  
    - unknown = ?  
18. **spore-print-color (n):** see `cap-color`  
19. **habitat (n):**  
    - grasses = g  
    - leaves = l  
    - meadows = m  
    - paths = p  
    - heaths = h  
    - urban = u  
    - waste = w  
    - woods = d  
20. **season (n):**  
    - spring = s  
    - summer = u  
    - autumn = a  
    - winter = w  

---

### Class Labels
- **edible = e**  
- **poisonous = p**  


In [None]:
x

In [None]:
print(x["habitat"].unique())


y

## Mushroom Classification Project

### Problem Description
The goal of this project is to classify mushrooms as **edible (e)** or **poisonous (p)** based on 20 descriptive features such as cap shape, cap color, stem characteristics, habitat, and season.  
This is a **binary classification problem** where the target variable is the mushroom class (`edible` or `poisonous`).  

---

### Project Workflow

#### 1. Exploratory Data Analysis (EDA)
- Inspect the dataset structure (rows, columns, data types).  
- Check for missing values and duplicates.  
- Visualize distributions of categorical features (bar plots) and numeric features (histograms, boxplots).  
- Explore correlations between features and the target class.  
- Look for class imbalance in the target variable (edible vs poisonous).  

#### 2. Data Cleaning & Preprocessing
- Handle missing values or unknown entries if present.  
- Encode categorical features (e.g., **One-Hot Encoding** or **Label Encoding**).  
- Scale numerical features (e.g., **StandardScaler** or **MinMaxScaler**).  
- Stratify the dataset when splitting into **training and test sets** to maintain class balance.  
- If class imbalance is significant, apply oversampling techniques (e.g., **SMOTE**).  

#### 3. Feature Selection
To reduce dimensionality and improve generalization, apply feature selection techniques such as:  
- **Filter Methods:** Use statistical tests (e.g., Chi-square, ANOVA F-test, mutual information) to rank features.  
- **Wrapper Methods:** Apply Recursive Feature Elimination (**RFE**) with models like Logistic Regression or Decision Trees.  
- **Embedded Methods:** Leverage feature importance from models (e.g., Decision Tree, Random Forest, Lasso Regression).  
- Compare model performance before and after feature selection to evaluate impact.  

#### 4. Model Training & Evaluation
Train and evaluate the following classification models:  
- **Logistic Regression**  
- **Decision Tree**  
- **Random Forest**  
- **Support Vector Machine (SVM)**  
- **K-Nearest Neighbors (KNN)**  
- **NaNaive Bayes**  

For each model:  
- Train on the **training set**.  
- Evaluate on the **test set** using:  
  - **Classification Report** (Precision, Recall, F1-score, Accuracy)  
  - **Confusion Matrix**  
  - **Cross-validation (optional)** for stability check  

#### 5. Hyperparameter Tuning
To improve model performance, apply hyperparameter optimization techniques:  
- **Grid Search:** Exhaustively search all combinations of parameters (good for small parameter spaces).  
- **Random Search:** Randomly sample parameter combinations (more efficient for large parameter spaces).  
- **Bayesian Optimization:** Iteratively choose hyperparameters based on past performance (efficient for complex models like Random Forest, SVM, or KNN).  

Compare tuned models with baseline models to measure improvement.  

#### 6. Feature Importance
- Extract **feature importance** from Decision Tree and Random Forest models.  
- Rank features to identify the most influential ones for classification.  
- Use feature importance as a basis for feature selection and model simplification.  

#### 7. Model Comparison
- Compare performance across all models (baseline, tuned, and with feature selection).  
- Use metrics such as accuracy, precision, recall, F1-score, and ROC-AUC to decide the best-performing model.
- Create plot of ROC-AUC curve of each model with different color for comparison  
- Summarize results in a **comparison table** or **bar plot**.  

---

#### 8. Save best models as pickle files

---

### Final Deliverables
- Preprocessed dataset ready for classification.  
- Performance evaluation of six classification models.  
- Insights on class imbalance and handling techniques.  
- Feature selection results (filter, wrapper, and embedded methods).  
- Hyperparameter tuning results from Grid Search, Random Search, and Bayesian Optimization.  
- Feature importance ranking for better model interpretability.  
- Final comparison and recommendation of the best-performing model for mushroom classification.  
- pickle files of the saved models


1- Exploratory Data Analysis (EDA)

In [None]:
df=pd.concat([x,y],axis=1)

In [None]:
df

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
missper =df.isnull().sum()/len(df) *100
plt.figure(figsize=(10,10))
missper.sort_values().plot(kind="bar")



#check all null data and remove colonm if nan values more than 20%

In [None]:
column=missper[missper > 30].index
df.drop(columns=column,inplace=True)
df.info()

In [None]:
sns.histplot(df["class"],bins=50,kde=True)
most_freq = df["ring-type"].mode()[0]
df["ring-type"] = df["ring-type"].replace("NAN", most_freq)

most_freq = df["gill-attachment"].mode()[0]
df["gill-attachment"] = df["gill-attachment"].replace("NAN", most_freq)

most_freq = df["cap-surface"].mode()[0]
df["cap-surface"] = df["cap-surface"].replace("NAN", most_freq)

In [None]:
dataAfter=df.isnull().sum()/len(df) *100
dataAfter.sort_values().plot(kind="bar")

In [None]:
sns.histplot(df["class"],bins=50,kde=True)

In [None]:
sns.scatterplot(x="class",y="cap-diameter",data=df)
df.head(10)

In [None]:
colm=["cap-shape","gill-attachment","gill-color","does-bruise-or-bleed","season","habitat","ring-type","has-ring","stem-color","cap-color","class","cap-surface"]
for z in colm:
    encoder=LabelEncoder()
    df[z]= encoder.fit_transform(df[z].astype(str))
    

In [None]:
df

In [None]:
print(df["does-bruise-or-bleed"].unique())
print(df["habitat"].unique())

In [None]:
df

In [None]:
plt.figure(figsize=(5,6))
plt.boxplot(df["stem-height"].dropna(), vert=True, showmeans=True)
plt.title("Box Plot — stem-height")
plt.ylabel("stem-height")
plt.grid(True, linestyle="--", alpha=0.4)
plt.show()

In [None]:
corr=df.corr()
plt.figure(figsize=(10,10))
sns.heatmap(corr, annot=True, cmap='coolwarm')

In [None]:
# selected_features = X_train.columns[rfe.support_]
# print("Selected Features:", selected_features.tolist())

# X_train_fs = X_train[selected_features]
# X_test_fs = X_test[selected_features]

# model.fit(X_train_fs, y_train)
# y_pred = model.predict(X_test_fs)

# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy after Feature Selection: {accuracy:.4f}")

In [None]:
# selected_features = X_train.columns[rfecv.support_]
# print("Selected Features:", selected_features.tolist())
# X_train_selected = X_train[selected_features]
# X_test_selected  = X_test[selected_features]
# grid = GridSearchCV(
#     LogisticRegression(max_iter=2000),
#     param_grid=param_grid,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1
# )
# grid.fit(X_train_selected, y_train)

# print("best_params:", grid.best_params_)
# print("acc Cross-Validation:", grid.best_score_)
# print("test set:", grid.score(X_test_selected, y_test))

In [None]:
# from sklearn.feature_selection import RFECV, SelectFromModel

# print("\n🌳 Decision Tree Feature Selection...")

# dt = DecisionTreeClassifier(random_state=42)

# # RFECV مع Decision Tree
# rfecv_dt = RFECV(
#     estimator=dt,
#     step=1,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     min_features_to_select=1
# )
# rfecv_dt.fit(X_train, y_train)

# selected_features_dt = X_train.columns[rfecv_dt.support_]
# print("✅ DT Selected Features:", selected_features_dt.tolist())

# X_train_dt = X_train[selected_features_dt]
# X_test_dt = X_test[selected_features_dt]

# # GridSearchCV على Decision Tree
# param_grid_dt = {
#     'criterion': ['gini', 'entropy'],
#     'max_depth': [None, 5, 10, 20, 50],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4]
# }

# grid_dt = GridSearchCV(
#     DecisionTreeClassifier(random_state=42),
#     param_grid=param_grid_dt,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     verbose=1
# )

# grid_dt.fit(X_train_dt, y_train)

# print("\n🏆 Decision Tree Results:")
# print("Best Params:", grid_dt.best_params_)
# print("CV Accuracy:", f"{grid_dt.best_score_:.4f}")
# print("Test Accuracy:", f"{grid_dt.score(X_test_dt, y_test):.4f}")

# # -----------------------------
# # Feature Selection + Tuning for Random Forest
# # -----------------------------
# print("\n🌲 Random Forest Feature Selection...")

# rf = RandomForestClassifier(random_state=42)

# # SelectFromModel مع Random Forest
# sfm = SelectFromModel(rf, threshold="median")
# sfm.fit(X_train, y_train)

# selected_features_rf = X_train.columns[sfm.get_support()]
# print(" RF Selected Features:", selected_features_rf.tolist())

# X_train_rf = X_train[selected_features_rf]
# X_test_rf = X_test[selected_features_rf]

# param_grid_rf = {
#     'n_estimators': [100, 200, 500],
#     'criterion': ['gini', 'entropy'],
#     'max_depth': [None, 10, 20, 50],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4]
# }
# grid_rf = GridSearchCV(
#     RandomForestClassifier(random_state=42, n_jobs=-1),
#     param_grid=param_grid_rf,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     verbose=1
# )

# grid_rf.fit(X_train_rf, y_train)

# print("\n Random Forest Results:")
# print("Best Params:", grid_rf.best_params_)
# print("CV Accuracy:", f"{grid_rf.best_score_:.4f}")
# print("Test Accuracy:", f"{grid_rf.score(X_test_rf, y_test):.4f}")

# importances = grid_rf.best_estimator_.feature_importances_
# feature_importance = pd.DataFrame({
#     'feature': selected_features_rf,
#     'importance': importances
# }).sort_values('importance', ascending=False)

# print("\n Top 10 Features from Random Forest:")
# print(feature_importance.head(10))
# #Best Accuration in this with select Feature

In [None]:
# print("\n🌳 Decision Tree Feature Selection...")

# dt = DecisionTreeClassifier(random_state=42)

# rfecv_dt = RFECV(
#     estimator=dt,
#     step=1,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     min_features_to_select=1
# )
# rfecv_dt.fit(X_train, y_train)

# selected_features_dt = X_train.columns[rfecv_dt.support_]
# X_train_dt, X_test_dt = X_train[selected_features_dt], X_test[selected_features_dt]

# param_grid_dt = {
#     'criterion': ['gini', 'entropy'],
#     'max_depth': [None, 5, 10, 20, 50],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4]
# }

# grid_dt = GridSearchCV(
#     DecisionTreeClassifier(random_state=42),
#     param_grid=param_grid_dt,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     verbose=1
# )
# grid_dt.fit(X_train_dt, y_train)

# print("\n🏆 Decision Tree Results:")
# print("Best Params:", grid_dt.best_params_)
# print("CV Accuracy:", f"{grid_dt.best_score_:.4f}")
# print("Test Accuracy:", f"{grid_dt.score(X_test_dt, y_test):.4f}")

In [None]:
# print("\n🤝 KNN Feature Selection...")

# # هنا هنستخدم SelectKBest (ANOVA F-test) بدل RFECV لأنه أسرع
# selector_knn = SelectKBest(score_func=f_classif, k=10)
# X_train_knn = selector_knn.fit_transform(X_train, y_train)
# X_test_knn = selector_knn.transform(X_test)
# selected_features_knn = X_train.columns[selector_knn.get_support()]
# print("✅ KNN Selected Features:", selected_features_knn.tolist())
# param_grid_knn = {
#     'n_neighbors': [3, 5, 7, 9, 11],
#     'weights': ['uniform', 'distance'],
#     'metric': ['euclidean', 'manhattan', 'minkowski']
# }
# grid_knn = GridSearchCV(
#     KNeighborsClassifier(),
#     param_grid=param_grid_knn,
#     cv=5,
#     scoring='accuracy',
#     n_jobs=-1,
#     verbose=1
# )
# grid_knn.fit(X_train_knn, y_train)
# print("\n🏆 KNN Results:")
# print("Best Params:", grid_knn.best_params_)
# print("CV Accuracy:", f"{grid_knn.best_score_:.4f}")
# print("Test Accuracy:", f"{grid_knn.score(X_test_knn, y_test):.4f}")

In [None]:
##print("\n📊 Naive Bayes Feature Selection...")

#selector_nb = SelectKBest(score_func=f_classif, k=15)
#X_train_nb = selector_nb.fit_transform(X_train, y_train)
#X_test_nb = selector_nb.transform(X_test)
#selected_features_nb = X_train.columns[selector_nb.get_support()]
#print("✅ NB Selected Features:", selected_features_nb.tolist())

#param_grid_nb = {
   # '#var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
#}
#grid_nb = GridSearchCV(
   # GaussianNB(),
    #param_grid=param_grid_nb,
    #cv=5,
    #scoring='accuracy',
   # n_jobs=-1,
   # verbose=1
#)
#grid_nb.fit(X_train_nb, y_train)
#print("\n Naive Bayes Results:")
#print("Best Params:", grid_nb.best_params_)
#print("CV Accuracy:", f"{grid_nb.best_score_:.4f}")
#print("Test Accuracy:", f"{grid_nb.score(X_test_nb, y_test):.4f}")

In [None]:

# ================================
X = df.drop("class", axis=1)
y = df["class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
results = {}
#  Logistic Regression
# ================================
print("\n Logistic Regression...")

rfecv_lr = RFECV(
    estimator=LogisticRegression(max_iter=2000, solver="lbfgs"),
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)
rfecv_lr.fit(X_train, y_train)

selected_features_lr = X_train.columns[rfecv_lr.support_]
X_train_lr = X_train[selected_features_lr]
X_test_lr = X_test[selected_features_lr]

param_grid_lr = {
    'C': [0.1, 1, 10],
    'solver': ['liblinear', 'lbfgs'],
    'penalty': ['l2']
}

grid_lr = GridSearchCV(
    LogisticRegression(max_iter=2000, random_state=42),
    param_grid=param_grid_lr,
    cv=5, scoring="accuracy", n_jobs=-1, verbose=1
)
grid_lr.fit(X_train_lr, y_train)

results["Logistic Regression"] = (
    grid_lr.score(X_test_lr, y_test),
    selected_features_lr.tolist(),
    grid_lr.best_estimator_
)

# 2️ Decision Tree
# ================================
print("\n Decision Tree...")

rfecv_dt = RFECV(
    estimator=DecisionTreeClassifier(random_state=42),
    cv=5, scoring="accuracy", n_jobs=-1
)
rfecv_dt.fit(X_train, y_train)

selected_features_dt = X_train.columns[rfecv_dt.support_]
X_train_dt = X_train[selected_features_dt]
X_test_dt = X_test[selected_features_dt]

param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_dt = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_grid=param_grid_dt,
    cv=5, scoring="accuracy", n_jobs=-1, verbose=1
)
grid_dt.fit(X_train_dt, y_train)

results["Decision Tree"] = (
    grid_dt.score(X_test_dt, y_test),
    selected_features_dt.tolist(),
    grid_dt.best_estimator_
)

# 3️Random Forest
# ================================
print("\n Random Forest...")

sfm_rf = SelectFromModel(RandomForestClassifier(random_state=42), threshold="median")
sfm_rf.fit(X_train, y_train)

selected_features_rf = X_train.columns[sfm_rf.get_support()]
X_train_rf = X_train[selected_features_rf]
X_test_rf = X_test[selected_features_rf]

param_grid_rf = {
    'n_estimators': [100, 200],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

grid_rf = GridSearchCV(
    RandomForestClassifier(random_state=42, n_jobs=-1),
    param_grid=param_grid_rf,
    cv=5, scoring="accuracy", n_jobs=-1, verbose=1
)
grid_rf.fit(X_train_rf, y_train)

results["Random Forest"] = (
    grid_rf.score(X_test_rf, y_test),
    selected_features_rf.tolist(),
    grid_rf.best_estimator_
)

#  KNN
# ================================
print("\n KNN...")

selector_knn = SelectKBest(score_func=f_classif, k=10)
X_train_knn = selector_knn.fit_transform(X_train, y_train)
X_test_knn = selector_knn.transform(X_test)

selected_features_knn = X_train.columns[selector_knn.get_support()]

param_grid_knn = {
    'n_neighbors': [3, 5, 7, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_knn = GridSearchCV(
    KNeighborsClassifier(),
    param_grid=param_grid_knn,
    cv=5, scoring="accuracy", n_jobs=-1, verbose=1
)
grid_knn.fit(X_train_knn, y_train)

results["KNN"] = (
    grid_knn.score(X_test_knn, y_test),
    selected_features_knn.tolist(),
    grid_knn.best_estimator_
)

# 5️ Naive Bayes
# ================================
print("\n Naive Bayes...")

selector_nb = SelectKBest(score_func=f_classif, k=10)
X_train_nb = selector_nb.fit_transform(X_train, y_train)
X_test_nb = selector_nb.transform(X_test)

selected_features_nb = X_train.columns[selector_nb.get_support()]

param_grid_nb = {'var_smoothing': np.logspace(-9, 0, 10)}

grid_nb = GridSearchCV(
    GaussianNB(),
    param_grid=param_grid_nb,
    cv=5, scoring="accuracy", n_jobs=-1, verbose=1
)
grid_nb.fit(X_train_nb, y_train)

results["Naive Bayes"] = (
    grid_nb.score(X_test_nb, y_test),
    selected_features_nb.tolist(),
    grid_nb.best_estimator_
)

# ================================
print("\n==============================")
print(" مقارنة الموديلات")
print("==============================")

for model_name, (acc, features, est) in results.items():
    print(f"\n🔹 {model_name}")
    print(f" Accuracy: {acc:.4f}")
    print(f" Selected Features: {features}")

    # التنبؤ
    if model_name == "KNN":
        y_pred = est.predict(X_test_knn)
    elif model_name == "Naive Bayes":
        y_pred = est.predict(X_test_nb)
    elif model_name == "Logistic Regression":
        y_pred = est.predict(X_test_lr)
    elif model_name == "Decision Tree":
        y_pred = est.predict(X_test_dt)
    elif model_name == "Random Forest":
        y_pred = est.predict(X_test_rf)

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()


In [None]:
# ================================
# 📌 Support Vector Machine (SVM) + RandomizedSearchCV
# ================================
print("\n📌 Support Vector Machine (SVM)...")

rfecv_svm = RFECV(
    estimator=SVC(kernel="linear"),  # linear علشان RFECV
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)
rfecv_svm.fit(X_train, y_train)

selected_features_svm = X_train.columns[rfecv_svm.support_]
print("✅ SVM Selected Features:", selected_features_svm.tolist())

X_train_svm = X_train[selected_features_svm]
X_test_svm = X_test[selected_features_svm]

# RandomizedSearch
param_dist_svm = {
    'C': uniform(0.1, 100),            
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

rand_svm = RandomizedSearchCV(
    SVC(probability=True, random_state=42),
    param_distributions=param_dist_svm,
    n_iter=20,          # عدد التجارب العشوائية
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    random_state=42,
    verbose=1
)

rand_svm.fit(X_train_svm, y_train)

print("\n🏆 SVM Results (Randomized Search):")
print("Best Params:", rand_svm.best_params_)
print("CV Accuracy:", f"{rand_svm.best_score_:.4f}")
print("Test Accuracy:", f"{rand_svm.score(X_test_svm, y_test):.4f}")

results["SVM"] = (
    rand_svm.score(X_test_svm, y_test),
    selected_features_svm.tolist(),
    rand_svm.best_estimator_
)

# Confusion Matrix
y_pred_svm = rand_svm.predict(X_test_svm)
cm = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix - SVM (RandomizedSearchCV)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
