In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Subset
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import numpy as np

# Step 1: Prepare CIFAR-10 Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224 (for ResNet)
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # CIFAR-10 normalization
])


# Step 1: Prepare CIFAR-10 Dataset
transform_otf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # Resize to 224x224 (for ResNet)
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # CIFAR-10 normalization
])


train_dataset_orig = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_dataset = Subset(train_dataset_orig, range(0,64))
cal_dataset = Subset(train_dataset_orig, range(64,1024))

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

X = train_dataset_orig.data
from sklearn.model_selection import train_test_split

X = train_dataset_orig.data
y = train_dataset_orig.targets

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=150)


# Step 2: Modify ResNet18 for CIFAR-10 (10 classes)
model = models.resnet18(pretrained=False)  # Do not load pretrained weights
model.fc = nn.Linear(model.fc.in_features, 10)  # Modify the final layer for CIFAR-10 (10 classes)

# Step 3: Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 5  # You can adjust the number of epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')

# Step 5: Fine-tuned model is ready

# Step 6: Wrap the model for MAPIE (similar to earlier)
class TorchClassifierWrapper:
    def __init__(self, model, transform=None, device=None):
        self.model = model
        self.transform = transform
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.classes_ = np.arange(10)  # CIFAR-10 has 10 classes
        # self.n_features_in_ = None  # Set in fit()

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        # self.n_features_in_ = X.shape[1:]  # Image shape, e.g., (3, 224, 224)
        return self

    def predict_proba(self, X):
        from sklearn.utils.validation import check_is_fitted
        check_is_fitted(self, ["classes_"])
        
        self.model.eval()
        
        tensors = torch.stack([transform_otf(image) for image in X])

        with torch.no_grad():
            # Transform and move data to device
            if isinstance(X, np.ndarray):
                X = torch.tensor(X).float()
            X_tensor = tensors.to(self.device)
            logits = self.model(X_tensor)
            probabilities = torch.nn.functional.softmax(logits, dim=1)
        return probabilities.cpu().numpy()

    def predict(self, X):
        probabilities = self.predict_proba(X)
        return np.argmax(probabilities, axis=1)

    def __sklearn_is_fitted__(self):
        return True

    # def get_params(self, deep=True):
    #     return {"model": self.model, "transform": self.transform, "device": self.device}

    # def set_params(self, **params):
    #     for param, value in params.items():
    #         setattr(self, param, value)
    #     return self

# Wrap the model
wrapped_model = TorchClassifierWrapper(model=model, transform=transform)

# Step 7: Use MAPIE for conformal predictions
from mapie.classification import MapieClassifier

# Create a dummy fit call (MAPIE needs this to work)
X_cal, y_cal = X_test, y_test

wrapped_model.fit(X_cal, y_cal)  # Fit the model wrapper
mapie = MapieClassifier(estimator=wrapped_model, method="aps", cv="prefit")
mapie.fit(X_cal, y_cal)  # Fit MAPIE with dummy data




Files already downloaded and verified
Files already downloaded and verified




Epoch [1/5], Loss: 2.8524, Accuracy: 18.75%
Epoch [2/5], Loss: 2.3423, Accuracy: 18.75%
Epoch [3/5], Loss: 2.2734, Accuracy: 18.75%
Epoch [4/5], Loss: 1.9770, Accuracy: 26.56%
Epoch [5/5], Loss: 1.9776, Accuracy: 32.81%


In [12]:
# Step 8: Make predictions on CIFAR-10 test set

y_pred, y_pred_set = mapie.predict(X_train[:20], alpha=0.1)
y_pred_set = [
    np.where(prediction_set_row)[0].tolist()
    for prediction_set_row in y_pred_set
]
print("True values:", y_train[:5])
print("Predicted labels:", y_pred[:5])
print("Prediction sets:", y_pred_set[:5])
    # break  # Remove to process the entire dataset

True values: [5, 4, 3, 0, 4]
Predicted labels: [2 0 2 0 5]
Prediction sets: [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 5, 7, 8, 9]]


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Subset
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# Step 1: Prepare CIFAR-10 Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224 (for ResNet)
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # CIFAR-10 normalization
])


# Step 1: Prepare CIFAR-10 Dataset
transform_otf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # Resize to 224x224 (for ResNet)
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # CIFAR-10 normalization
])

train_dataset_orig = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


Files already downloaded and verified


In [7]:
import numpy as np
from mapie.classification import MapieClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.datasets import load_wine, load_iris
from sklearn.model_selection import train_test_split
from models.classifier_model import ClassifierModel

# Generate synthetic data
X, y = make_classification(n_classes=10,n_samples=15000, n_features=15, random_state=42,n_informative=5)
# X,y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/4, shuffle=True)
X_train, X_cal, y_train, y_cal = train_test_split(X_train,y_train,test_size=1/3, shuffle=True)

# Fit MapieClassifiers
clf = ClassifierModel(input_dim=X.shape[1],hidden_dim=16,output_dim=len(np.unique(y)))
clf.fit(X_train, y_train)
# clf = RandomForestClassifier()
# clf.fit(X_train, y_train)
mapie = MapieClassifier(estimator=clf, method="score", cv="prefit")
mapie.fit(X_cal, y_cal)

# Predict with alpha for prediction sets
alpha = 0.05
predictions, prediction_sets = mapie.predict(X_test, alpha=alpha)

plausible_labels = [
    np.where(prediction_set_row)[0].tolist()  # Extract indices where value is True
    for prediction_set_row in prediction_sets
]


print(plausible_labels)

# Coverage Calculation
coverage = np.mean([
    y_test[i] in plausible_labels[i]
    for i in range(len(y_test))
])

# Efficiency Calculation
efficiency = np.mean([
    len(plausible_labels[i])
    for i in range(len(plausible_labels))
])

print(f"Coverage: {coverage:.2f}")
print(f"Efficiency: {efficiency:.2f}")

[[3, 6, 8], [0, 2, 3, 5, 6, 8], [4, 6, 9], [0, 1, 2, 6, 8], [0, 1, 2, 3, 4, 6, 8], [2, 6, 9], [2, 3, 5, 6, 7, 9], [1, 4, 6, 7, 8, 9], [0, 1, 2, 3, 6], [0, 2, 4, 6, 7, 9], [1, 3, 5, 6], [3, 4, 5, 7], [0, 2, 3, 4, 6, 8, 9], [2, 3, 6, 8], [1, 3, 4, 5, 7], [4, 6, 7, 9], [0, 1, 2, 3, 6, 8, 9], [2, 6, 7, 8], [2, 6, 7, 8], [5, 6, 9], [4, 6, 7, 9], [1, 4, 6], [1, 3, 5, 6], [0, 1, 2, 3, 6, 8], [4, 7, 8, 9], [2, 3, 6, 8], [1, 2, 3, 4, 5, 6, 7], [1, 2, 3], [0, 1, 2], [1, 4, 5], [0, 1, 2, 3, 6], [1, 6, 9], [4, 5, 6, 7, 9], [3, 6, 8], [0, 2, 3, 4, 8, 9], [0, 1, 2, 4, 6, 7, 8, 9], [4, 5, 6, 7, 9], [0, 2, 3, 4, 7, 8, 9], [0, 1, 2, 3, 6], [0, 2, 3, 4, 7, 8], [0, 1, 2, 3, 6, 9], [4, 5, 7], [1, 6, 8], [0, 2, 3, 4, 6, 8], [0, 1, 2, 4, 6, 9], [0, 2, 3, 8], [2, 3, 5, 6, 7, 8], [4, 8], [0, 1, 2, 3, 4, 5, 6, 7], [2, 3, 5, 6, 7, 9], [1, 4, 8], [0, 1, 2, 3, 6, 9], [0, 1, 2, 5], [0, 1, 2, 3, 6, 9], [1, 5, 6, 8], [0, 2, 3, 4, 8], [0, 1, 2, 4, 7, 9], [0, 1, 5, 6, 8], [3, 5, 7], [6, 7, 9], [4, 6, 7, 9], [0, 1, 2, 

In [9]:
mapie.estimator.gradient_updates

18800