In [1]:
import collections
from river import datasets

X_y = datasets.CreditCard()

counts = collections.Counter(y for _, y in X_y)

for c, count in counts.items():
    print(f'{c}: {count} ({count / sum(counts.values()):.5%})')

Downloading https://maxhalford.github.io/files/datasets/creditcardfraud.zip (65.95 MB)
Uncompressing into /home/jbris/river_data/CreditCard
0: 284315 (99.82725%)
1: 492 (0.17275%)


In [2]:
from river import linear_model
from river import metrics
from river import evaluate
from river import preprocessing


X_y = datasets.CreditCard()

model = (
    preprocessing.StandardScaler() |
    linear_model.LogisticRegression()
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 89.11%

In [3]:
from river import optim

model = (
    preprocessing.StandardScaler() |
    linear_model.LogisticRegression(
        loss=optim.losses.Log(weight_pos=5)
    )
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 91.43%

In [4]:
model = (
    preprocessing.StandardScaler() |
    linear_model.LogisticRegression(loss=optim.losses.BinaryFocalLoss(2, 1))
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 91.31%

In [5]:
from river import imblearn

model = (
    preprocessing.StandardScaler() |
    imblearn.RandomUnderSampler(
        classifier=linear_model.LogisticRegression(),
        desired_dist={0: .8, 1: .2},
        seed=42
    )
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 94.75%

In [6]:
model = (
    preprocessing.StandardScaler() |
    imblearn.RandomOverSampler(
        classifier=linear_model.LogisticRegression(),
        desired_dist={0: .8, 1: .2},
        seed=42
    )
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 91.71%

In [7]:
model = (
    preprocessing.StandardScaler() |
    imblearn.RandomSampler(
        classifier=linear_model.LogisticRegression(),
        desired_dist={0: .8, 1: .2},
        sampling_rate=.01,
        seed=42
    )
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 94.71%

In [8]:
model = (
    preprocessing.StandardScaler() |
    imblearn.RandomUnderSampler(
        classifier=linear_model.LogisticRegression(
            loss=optim.losses.Log(weight_pos=5)
        ),
        desired_dist={0: .8, 1: .2},
        seed=42
    )
)

metric = metrics.ROCAUC()

evaluate.progressive_val_score(X_y, model, metric)

ROCAUC: 96.52%