In [1]:
import polars as pl
import numpy as np
import pandas as pd
import altair as alt
import os
import wget
import zipfile

from os.path import exists
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

from mapie.classification import MapieClassifier
from mapie.metrics import classification_coverage_score
from mapie.metrics import classification_coverage_score_v2
from mapie.metrics import classification_mean_width_score

In [2]:
beans = "./DryBeanDataset/Dry_Bean_Dataset.xlsx"
base = "https://archive.ics.uci.edu/ml/machine-learning-databases/"
dataset_number = "00602"

if not exists(beans):
    filename = "DryBeanDataset.zip"
    url = f"{base}{dataset_number}/{filename}"
    wget.download(url)
    with zipfile.ZipFile(filename, "r") as zip_ref:
        zip_ref.extractall('./')
    os.remove(filename)

bean_df = pl.read_excel("./DryBeanDataset/Dry_Bean_Dataset.xlsx")
bean_df = bean_df.to_pandas()
le = LabelEncoder()
names = bean_df["Class"].unique()
bean_df["Class"] = le.fit_transform(bean_df["Class"])

y = pd.Series(bean_df["Class"])
X = pd.DataFrame(bean_df.drop("Class", axis=1))
X_train, X_rest1, y_train, y_rest1 = train_test_split(X, y, train_size=10000, random_state=2)
X_test, X_rest2, y_test, y_rest2 = train_test_split(X_rest1, y_rest1, train_size=1000, random_state=42)
X_calib, X_new, y_calib, y_new = train_test_split(X_rest2, y_rest2, train_size=1000, random_state=42)

In [3]:
model = GaussianNB().fit(X_train, y_train)
y_pred = model.predict(X_test)

In [4]:
# For APS we first need to uncover the coverage per class and not the avergae classes_data

In [5]:
type(y_new)

pandas.core.series.Series

In [6]:
# the goal is to get coverage close to the 95 % threshold not getting a value as high as possible, while keeping avg set size small
# to artificially lower the coverage of BOMBAY ( as this class was predicted to 100 %) however the score method and the marginal coverage are not adaptive
# adaptive prediction sets I reach differently

In [7]:
# Adaptivity:
# A conformal prediction algorithm is adaptive if it not only achieves marginal coverage but also (apporx.) conditional coverage

In [9]:
# APS
# In this approach we calculate standard non-conformity scores like for score to get the threshold for the calibration set
# then we calculate all class probabilities and to decide which class makes it into the final prediction set (per data point) , we add up the probabilities of a class, e.g.

#cat
#0.5 probability the image is a cat
#lion
#0.3
#dog
#0.1

#if threshold from calibration set is 95 %
#all three classes would be in the prediction set for this specific prediction data point.

# normallly about the las class it is a question to take it in or leave it, as you normally would come above the threshold or stay below

#e.g.
#cat
#0.5 probability the image is a cat
#lion
#0.3
#dog
#0.1
#hamster
#0.06

#the latter would bring us above the threshold, and it is always a question, if we want this or not. Calculation examples indicate that an inclusion 
# leads at least to better results than score bringing most classes above the the treshold or close, but the set sizes are usually larger as wwith standard score , score has the smallest sets on average, but the weakest coverage guarantee
# leaving the last one out, usually leads to worse results than score and should not be done, random is to be preferred, as you can guarantee to have a sample size correction in the coverage added see page 52


In [10]:
mapie_score = MapieClassifier(model, cv="prefit", method="aps")

In [11]:
mapie_score.fit(X_calib, y_calib)

In [12]:
y_pred, y_set = mapie_score.predict(X_new, alpha=0.05, include_last_label=True)
y_set = np.squeeze(y_set)

In [13]:
def class_wise_performance(y_new, y_set, classes):
    df = pd.DataFrame()
    for i in range(len(classes)):
        ynew = y_new.values[y_new.values == i]
        yscore = y_set[y_new.values == i]
        cov = classification_coverage_score(ynew, yscore)
        size = classification_mean_width_score(yscore)
        tmp_df = pd.DataFrame({
            "class": [classes[i]],
            "coverage": [cov],
            "avg set size": [size]
        }, index = [i]
        )
        df = pd.concat([df, tmp_df])
    return df

In [14]:
cov = classification_coverage_score(y_new, y_set)
setsize = classification_mean_width_score(y_set)

In [15]:
cov

0.9689633767846059

In [16]:
setsize

1.914338919925512

In [17]:
class_wise_performance(y_new, y_set, le.classes_)

Unnamed: 0,class,coverage,avg set size
0,BARBUNYA,0.931034,2.206897
1,BOMBAY,1.0,1.0
2,CALI,0.971591,2.153409
3,DERMASON,0.982759,1.512315
4,HOROZ,0.942623,1.97541
5,SEKER,0.982979,2.114894
6,SIRA,0.974277,2.131833


In [None]:
# leave the last out

In [19]:
y_pred, y_set = mapie_score.predict(X_new, alpha=0.05, include_last_label=False)
y_set = np.squeeze(y_set)
cov = classification_coverage_score(y_new, y_set)
setsize = classification_mean_width_score(y_set)
print(cov)
print(setsize)
print(class_wise_performance(y_new, y_set, le.classes_))

0.8150217256362507
1.1744258224705153
      class  coverage  avg set size
0  BARBUNYA  0.534483      1.218391
1    BOMBAY  1.000000      1.000000
2      CALI  0.846591      1.153409
3  DERMASON  0.921182      1.162562
4     HOROZ  0.803279      1.270492
5     SEKER  0.731915      1.131915
6      SIRA  0.848875      1.170418


In [None]:
#sets are really small but coverage is bad

In [20]:
y_pred, y_set = mapie_score.predict(X_new, alpha=0.05, include_last_label="randomized")
y_set = np.squeeze(y_set)
cov = classification_coverage_score(y_new, y_set)
setsize = classification_mean_width_score(y_set)
print(cov)
print(setsize)
print(class_wise_performance(y_new, y_set, le.classes_))

0.962756052141527
1.899441340782123
      class  coverage  avg set size
0  BARBUNYA  0.931034      2.224138
1    BOMBAY  1.000000      2.000000
2      CALI  0.971591      2.176136
3  DERMASON  0.972906      1.450739
4     HOROZ  0.934426      1.987705
5     SEKER  0.970213      2.000000
6      SIRA  0.971061      1.980707
