## Example case: breast-cancer classification

### Imports

In [None]:
import sys
sys.path.append("../itershap")

import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from itershap import IterSHAP

### Load data and apply IterSHAP feature selection

In [None]:
# Load data from SK Learn's datasets
X, y = load_breast_cancer(return_X_y=True)

X_without_fs = pd.DataFrame(X)

# Check the current shape of the dataset
print(X.shape)

# Create and fit IterSHAP using a RandomForestClassifier
itershap_fs = IterSHAP(RandomForestClassifier)
itershap_fs.fit(X, y)

# Transform the input data to only include selected features and print its shape
X = itershap_fs.transform()
print(X.shape)

### Perform classification

#### Without feature selection

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_without_fs, y, test_size=0.25)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_pred, y_test)
print(accuracy)

#### With feature selection

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_pred, y_test)
print(accuracy)
