# RIPPER ( Repeated Incremental Pruning to Produce Error Reduction)
[RIPPER](https://www.geeksforgeeks.org/ripper-algorithm/)



In [1]:
from sklearn.model_selection import train_test_split
import wittgenstein as lw

In [2]:

import sys
sys.path.append("/home/alvaro/Desktop/MachineLearning/PracticalApplications/1-3/my-staff")
from utils import load_ds
from globals import TEST_SIZE, SEED

In [3]:
X, y, features_names, class_names = load_ds("wrapper")
X.shape

(293, 10)

In [4]:
params =  {'alpha': 0.1, 'k': 2, 'n-discretize-bins': 10, 'prune-size': 0.1}

# Evaluate

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=TEST_SIZE,
                                                                random_state=SEED)

print(len(X_train), len(X_test))

205 88


In [6]:
ripper_clf = lw.RIPPER(k=params["k"], alpha=params["alpha"], n_discretize_bins=params["n-discretize-bins"], prune_size=params["prune-size"], verbosity=True) # Or irep_clf = lw.IREP() to build a model using IREP
ripper_clf

<RIPPER(random_state=None, prune_size=0.1, alpha=0.1, verbosity=True, max_total_conds=None, max_rules=None, n_discretize_bins=10, dl_allowance=64, max_rule_conds=None, k=2)>

In [7]:
ripper_clf.fit(X_train, y_train)

discretizing 5 features

GREW INITIAL RULESET:
[[Hepatomegaly_Y=True ^ Bilirubin=>7.16] V
[Stage=4.0 ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Hepatomegaly_Y=True ^ Copper=80.8-104.6] V
[Hepatomegaly_Y=True ^ Bilirubin=3.9-7.16] V
[Copper=80.8-104.6] V
[Stage=4.0 ^ N_Days=3072.4-3821.8] V
[Hepatomegaly_Y=True ^ SGOT=128.65-140.84] V
[Copper=<24.0] V
[Copper=>200.6] V
[Hepatomegaly_Y=True ^ Prothrombin=10.2-10.6 ^ Spiders_Y=False] V
[Prothrombin=10.7-11.0 ^ Copper=69.0-80.8] V
[N_Days=602.6-999.8] V
[Copper=44.4-57.6 ^ Bilirubin=0.6-0.8] V
[Prothrombin=10.7-11.0 ^ Stage=3.0] V
[Bilirubin=0.8-1.06 ^ Prothrombin=9.74-9.9]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[Hepatomegaly_Y=True ^ Bilirubin=>7.16] V
[Stage=4.0 ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Hepatomegaly_Y=True ^ Copper=80.8-104.6] V
[Hepatomegaly_Y=True ^ Bilirubin=3.9-7.16] V
[Copper=80.8-104.6] V
[Stage=4.0 ^ N_Days=3072.4-3821.8] V
[Hepatom

In [8]:
ripper_clf.out_model()

[[Hepatomegaly_Y=True ^ Bilirubin=>7.16] V
[Stage=4.0 ^ N_Days=602.6-999.8] V
[N_Days=<602.6] V
[SGOT=>198.4 ^ Copper=104.6-141.8] V
[Copper=>200.6] V
[Copper=44.4-57.6 ^ Bilirubin=0.6-0.8]]


# Evaluation

In [9]:
from utils import eval_model, show_report

In [10]:
y_pred = ripper_clf.predict(X_test)

In [11]:
eval_model(y_test, y_pred)

Métricas de evaluación:
Precision: 0.7710
Recall: 0.7614
F1-score: 0.7530


In [12]:
show_report(y_test, y_pred)

{'0': {'precision': 0.7377049180327869, 'recall': 0.9, 'f1-score': 0.8108108108108109, 'support': 50.0}, '1': {'precision': 0.8148148148148148, 'recall': 0.5789473684210527, 'f1-score': 0.6769230769230768, 'support': 38.0}, 'accuracy': 0.7613636363636364, 'macro avg': {'precision': 0.7762598664238008, 'recall': 0.7394736842105263, 'f1-score': 0.7438669438669439, 'support': 88.0}, 'weighted avg': {'precision': 0.7710023734613898, 'recall': 0.7613636363636364, 'f1-score': 0.752995652995653, 'support': 88.0}}
              precision    recall  f1-score   support

           0       0.74      0.90      0.81        50
           1       0.81      0.58      0.68        38

    accuracy                           0.76        88
   macro avg       0.78      0.74      0.74        88
weighted avg       0.77      0.76      0.75        88

