# RIPPER ( Repeated Incremental Pruning to Produce Error Reduction)
[RIPPER](https://www.geeksforgeeks.org/ripper-algorithm/)



In [1]:
from sklearn.model_selection import train_test_split
import wittgenstein as lw

In [2]:
import sys


In [3]:
X, y, features_names, class_names = load_ds("all_encoded_cirrhosis")
X.shape

(293, 25)

# Hiperparameter selection

In [4]:
from sklearn.model_selection import GridSearchCV

In [5]:
# Definir los posibles valores de los parámetros
param_grid = {
    'prune-size': [0.1, 0.3, 0.5],
    'k': [2, 3, 4],
    'alpha': [0.1, 1.0, 2.0],
    'n-discretize-bins': [ 10, 15, 20]
}

grid_search = GridSearchCV(
    estimator= lw.RIPPER(random_state=SEED),
    param_grid=param_grid,
    cv=5,
    n_jobs=-1
)

In [6]:
grid_search.fit(X, y)

# Mostrar los mejores parámetros y el mejor rendimiento
params = grid_search.best_params_
print("Best params:", params)
print("Best score:", grid_search.best_score_)

Best params: {'alpha': 0.1, 'k': 2, 'n-discretize-bins': 10, 'prune-size': 0.1}
Best score: 0.6150789012273525


# Evaluate

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=TEST_SIZE,
                                                                random_state=SEED)

print(len(X_train), len(X_test))

205 88


In [14]:
ripper_clf = lw.RIPPER(k=params["k"], alpha=params["alpha"], n_discretize_bins=params["n-discretize-bins"], prune_size=params["prune-size"], verbosity=True) # Or irep_clf = lw.IREP() to build a model using IREP
ripper_clf

<RIPPER(random_state=None, alpha=0.1, dl_allowance=64, verbosity=True, prune_size=0.1, max_total_conds=None, k=2, max_rule_conds=None, max_rules=None, n_discretize_bins=10)>

In [15]:
ripper_clf.fit(X_train, y_train)

discretizing 11 features

GREW INITIAL RULESET:
[[Hepatomegaly_N=False ^ Bilirubin=>7.16] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[Hepatomegaly_N=False ^ Cholesterol=369.51-404.8] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Alk_Phos=>3872.0] V
[Prothrombin=11.5-12.0] V
[Cholesterol=404.8-510.0 ^ Age=35.41-40.71] V
[Sex_F=False ^ Bilirubin=1.3-1.9] V
[Copper=<24.0 ^ Tryglicerides=<68.0] V
[Alk_Phos=2109.4-3872.0] V
[Tryglicerides=82.8-93.4] V
[Cholesterol=295.2-325.0] V
[Platelets=<133.6 ^ N_Days=2575.6-3072.4]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[Hepatomegaly_N=False ^ Bilirubin=>7.16] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[Hepatomegaly_N=False ^ Cholesterol=369.51-404.8] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Alk_Phos=>3872.0] V
[Prothrombin=11.5-12.0] V
[Cholesterol=404.8-510.0 ^ Age=35.41-40.71] V
[Sex_F=False ^ Bilirubin=1.3-1.9] V
[Copper=<24.0 ^ Tryglicerides=<68.0] V
[Alk_Phos=2109.4-3872.0] V
[Tryglicerides=82.8-93.4

In [16]:
ripper_clf.out_model()

[[Hepatomegaly_N=False ^ Bilirubin=>7.16] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Prothrombin=11.5-12.0]]


# Evaluation

In [17]:
from utils import eval_model, show_report

In [18]:
y_pred = ripper_clf.predict(X_test)

In [19]:
eval_model(y_test, y_pred)

Métricas de evaluación:
Precision: 0.7775
Recall: 0.7500
F1-score: 0.7339


In [20]:
show_report(y_test, y_pred)

{'0': {'precision': 0.7121212121212122, 'recall': 0.94, 'f1-score': 0.810344827586207, 'support': 50.0}, '1': {'precision': 0.8636363636363636, 'recall': 0.5, 'f1-score': 0.6333333333333333, 'support': 38.0}, 'accuracy': 0.75, 'macro avg': {'precision': 0.7878787878787878, 'recall': 0.72, 'f1-score': 0.7218390804597701, 'support': 88.0}, 'weighted avg': {'precision': 0.7775482093663914, 'recall': 0.75, 'f1-score': 0.7339080459770115, 'support': 88.0}}
              precision    recall  f1-score   support

           0       0.71      0.94      0.81        50
           1       0.86      0.50      0.63        38

    accuracy                           0.75        88
   macro avg       0.79      0.72      0.72        88
weighted avg       0.78      0.75      0.73        88

