# RIPPER ( Repeated Incremental Pruning to Produce Error Reduction)
[RIPPER](https://www.geeksforgeeks.org/ripper-algorithm/)



In [1]:
from sklearn.model_selection import train_test_split
import wittgenstein as lw

In [2]:

import sys
sys.path.append("/home/alvaro/Desktop/MachineLearning/PracticalApplications/1-3/my-staff")
from utils import load_ds
from globals import TEST_SIZE, SEED

In [6]:
X, y, features_names, class_names = load_ds("univariant")
X.shape

(293, 10)

In [7]:
params =  {'alpha': 0.1, 'k': 2, 'n-discretize-bins': 10, 'prune-size': 0.1}

# Evaluate

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=TEST_SIZE,
                                                                random_state=SEED)

print(len(X_train), len(X_test))

205 88


In [9]:
ripper_clf = lw.RIPPER(k=params["k"], alpha=params["alpha"], n_discretize_bins=params["n-discretize-bins"], prune_size=params["prune-size"], verbosity=True) # Or irep_clf = lw.IREP() to build a model using IREP
ripper_clf

<RIPPER(verbosity=True, random_state=None, dl_allowance=64, n_discretize_bins=10, prune_size=0.1, max_rule_conds=None, alpha=0.1, k=2, max_rules=None, max_total_conds=None)>

In [10]:
ripper_clf.fit(X_train, y_train)

discretizing 5 features

GREW INITIAL RULESET:
[[Hepatomegaly_N=False ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Copper=>200.6 ^ Bilirubin=>7.16] V
[Hepatomegaly_N=False ^ Bilirubin=3.9-7.16] V
[Stage=4.0 ^ Albumin=<3.0] V
[Copper=80.8-104.6 ^ Albumin=3.0-3.23] V
[Bilirubin=2.78-3.9] V
[Hepatomegaly_N=False] V
[Copper=>200.6] V
[N_Days=<602.6] V
[N_Days=2575.6-3072.4 ^ Copper=<24.0] V
[N_Days=1945.0-2290.4] V
[Copper=80.8-104.6 ^ N_Days=2575.6-3072.4] V
[Copper=44.4-57.6 ^ Bilirubin=0.6-0.8] V
[Bilirubin=1.06-1.3 ^ N_Days=3072.4-3821.8] V
[N_Days=999.8-1349.2] V
[Prothrombin=10.7-11.0 ^ N_Days=2290.4-2575.6]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[Hepatomegaly_N=False ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Copper=>200.6 ^ Bilirubin=>7.16] V
[Hepatomegaly_N=False ^ Bilirubin=3.9-7.16] V
[Stage=4.0 ^ Albumin=<3.0] V
[Copper=80.8-104.6 ^ Albumin=3.0-3.23] V
[Bilirubin=2.78-3.9] V
[Hepatomegaly_N=False] V
[Copper=>200.6] V
[N_Days=<

In [11]:
ripper_clf.out_model()

[[Hepatomegaly_N=False ^ N_Days=<602.6] V
[Hepatomegaly_N=False ^ N_Days=602.6-999.8] V
[Copper=>200.6 ^ Bilirubin=>7.16] V
[Stage=4.0 ^ Albumin=<3.0] V
[Copper=80.8-104.6 ^ Albumin=3.0-3.23] V
[Copper=>200.6] V
[N_Days=<602.6]]


# Evaluation

In [12]:
from utils import eval_model, show_report

In [13]:
y_pred = ripper_clf.predict(X_test)

In [14]:
eval_model(y_test, y_pred)

Métricas de evaluación:
Precision: 0.8127
Recall: 0.7955
F1-score: 0.7872


In [15]:
show_report(y_test, y_pred)

{'0': {'precision': 0.7580645161290323, 'recall': 0.94, 'f1-score': 0.8392857142857143, 'support': 50.0}, '1': {'precision': 0.8846153846153846, 'recall': 0.6052631578947368, 'f1-score': 0.71875, 'support': 38.0}, 'accuracy': 0.7954545454545454, 'macro avg': {'precision': 0.8213399503722084, 'recall': 0.7726315789473683, 'f1-score': 0.7790178571428572, 'support': 88.0}, 'weighted avg': {'precision': 0.8127114820663208, 'recall': 0.7954545454545454, 'f1-score': 0.7872362012987014, 'support': 88.0}}
              precision    recall  f1-score   support

           0       0.76      0.94      0.84        50
           1       0.88      0.61      0.72        38

    accuracy                           0.80        88
   macro avg       0.82      0.77      0.78        88
weighted avg       0.81      0.80      0.79        88

