# Diff Test Val

Dieses Notebook ist ein Testnotebook, um den Unterschied zwischen Testpartitionierung mit Trainings-, und Validierungspartitionierung zu zeigen.
Die Methode die hier angewendet wurde lässt sich wie folgt beschreiben:
- ResNet50 aus PyTorch oder unser trainiertes Modell auf den Daten verwenden
- Den Zweitletzten Layer, sprich vor dem Classifier als Output bzw. Featuremap verwenden
- Featuremap durch PCA Dimensions reduzieren auf 2 Dimensionen
- Die 2D Featuremap plotten

In [1]:
%load_ext autoreload
%autoreload 2

import os
if os.getcwd() == '/home/jovyan/work': # jhub
    os.chdir("24FS_I4DS27/main/") 
    os.system("make reqs")
else: # local
    os.chdir("../")

In [2]:
import torch 
import torchvision
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm
from src.data.mri import MRIDataModule
from src.data.covidx import COVIDXDataModule
from src.models.imageclassifier import ImageClassifier
from sklearn.decomposition import PCA

DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
NUM_WORKERS = 0
BATCH_SIZE = 32

print(f"Device: {DEVICE}")

Device: mps


In [3]:
transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize((224, 224), antialias=True),
    ]
)

class passthrough(torch.nn.Module):
    def forward(self, x):
        return x

def get_model(modelname, ours=False):
    if ours:
        model = ImageClassifier.load_from_checkpoint(
            checkpoint_path=f"models/{modelname}-covidx_data/model.ckpt",
            modelname=modelname,
            output_size=1,
            p_dropout_classifier=0.0,
            lr=0.0,
            weight_decay=0.0,
        )
        model.model.fc = passthrough()
    else:
        model = torchvision.models.get_model(modelname, weights="DEFAULT")
        model.fc = passthrough()

    model.eval()
    model = model.to(DEVICE)
    return model

def get_datamodule(dataset):
    if dataset == "covidx_data":
        datamodule = COVIDXDataModule(
            path="data/raw/COVIDX-CXR4",
            transform=transform,
            num_workers=NUM_WORKERS,
            batch_size=BATCH_SIZE,
            train_sample_size=0.125,
            train_shuffle=True,
            seed=42,
        ).setup()
    if dataset == "mri_data":
        datamodule = MRIDataModule(
            path="data/raw/Brain-Tumor-MRI",
            path_processed="data/processed/Brain-Tumor-MRI",
            transform=transform,
            num_workers=NUM_WORKERS,
            batch_size=BATCH_SIZE,
            train_shuffle=True,
        ).setup()

    return datamodule

In [4]:
def visualize_feature_maps(modelname, our_model, datasetname, one_plot=False):
    print(f"Visualizing feature maps for {'our ' if our_model else ''}{modelname} on {datasetname}...")

    model = get_model(modelname, ours=our_model)
    datamodule = get_datamodule(datasetname)

    # Extract feature maps
    with torch.no_grad():
        train_feature_maps = []
        val_feature_maps = []
        test_feature_maps = []

        print("Extracting feature maps from train...")
        for batch in tqdm(datamodule.train_dataloader(), desc="Batch"):
            x, _ = batch
            feature_map = model(x.to(DEVICE))
            train_feature_maps.append(feature_map)

        print("Extracting feature maps from val...")
        for batch in tqdm(datamodule.val_dataloader(), desc="Batch"):
            x, _ = batch
            feature_map = model(x.to(DEVICE))
            val_feature_maps.append(feature_map)

        print("Extracting feature maps from test...")
        for batch in tqdm(datamodule.test_dataloader(), desc="Batch"):
            x, _ = batch
            feature_map = model(x.to(DEVICE))
            test_feature_maps.append(feature_map)

    # Concatenate feature maps
    train_feature_maps = torch.cat(train_feature_maps)
    val_feature_maps = torch.cat(val_feature_maps)
    test_feature_maps = torch.cat(test_feature_maps)
    all_feature_maps = torch.cat([train_feature_maps, val_feature_maps, test_feature_maps], dim=0)

    # Convert to numpy
    train_feature_maps = train_feature_maps.detach().cpu().numpy()
    test_feature_maps = test_feature_maps.detach().cpu().numpy()
    val_feature_maps = val_feature_maps.detach().cpu().numpy()
    all_feature_maps = all_feature_maps.detach().cpu().numpy()

    # Calculate PCA
    pca = PCA(n_components=2, random_state=42)
    pca.fit(all_feature_maps)

    train_results = pca.transform(train_feature_maps)
    val_results = pca.transform(val_feature_maps)
    test_results = pca.transform(test_feature_maps)

    print(f"n Train:\t{train_results.shape[0]}")
    print(f"n Val:\t\t{val_results.shape[0]}")
    print(f"n Test:\t\t{test_results.shape[0]}")

    # Get lims
    x_min = min(train_results[:, 0].min(), val_results[:, 0].min(), test_results[:, 0].min())
    x_max = max(train_results[:, 0].max(), val_results[:, 0].max(), test_results[:, 0].max())
    y_min = min(train_results[:, 1].min(), val_results[:, 1].min(), test_results[:, 1].min())
    y_max = max(train_results[:, 1].max(), val_results[:, 1].max(), test_results[:, 1].max())

    # Plot PCA
    fig = px.scatter(x=train_results[:, 0], y=train_results[:, 1], title="PCA of Train Feature Maps", labels={"x": "PC1", "y": "PC2"}, opacity=0.5)
    fig.update_layout(xaxis_range=[x_min, x_max], yaxis_range=[y_min, y_max])
    fig.show()

    fig = px.scatter(x=val_results[:, 0], y=val_results[:, 1], title="PCA of Validation Feature Maps", labels={"x": "PC1", "y": "PC2"}, opacity=0.5)
    fig.update_layout(xaxis_range=[x_min, x_max], yaxis_range=[y_min, y_max])
    fig.show()

    fig = px.scatter(x=test_results[:, 0], y=test_results[:, 1], title="PCA of Test Feature Maps", labels={"x": "PC1", "y": "PC2"}, opacity=0.5)
    fig.update_layout(xaxis_range=[x_min, x_max], yaxis_range=[y_min, y_max])
    fig.show()

    results = {'train': train_results, 'val': val_results, 'test': test_results}

    if one_plot:
        fig = go.Figure()
        for key, color in zip(results, ['blue', 'green', 'red']):
            fig.add_trace(go.Scatter(x=results[key][:, 0], y=results[key][:, 1], mode='markers', name=f"{key.capitalize()} Feature Maps", marker=dict(color=color), opacity=0.5))
        fig.update_layout(title="PCA of Feature Maps", xaxis_title="PC1", yaxis_title="PC2", legend_title="Dataset", xaxis_range=[x_min, x_max], yaxis_range=[y_min, y_max])
        fig.show()


In [5]:
visualize_feature_maps("resnet50", our_model=True, datasetname="covidx_data", one_plot=True)

Visualizing feature maps for our resnet50 on covidx_data...
Extracting feature maps from train...


Batch: 100%|██████████| 266/266 [01:44<00:00,  2.54it/s]


Extracting feature maps from val...


Batch: 100%|██████████| 265/265 [01:50<00:00,  2.40it/s]


Extracting feature maps from test...


Batch: 100%|██████████| 266/266 [01:50<00:00,  2.42it/s]


n Train:	8483
n Val:		8473
n Test:		8482


In [6]:
visualize_feature_maps("resnet50", our_model=False, datasetname="covidx_data", one_plot=True)

Visualizing feature maps for resnet50 on covidx_data...
Extracting feature maps from train...


Batch: 100%|██████████| 266/266 [01:43<00:00,  2.58it/s]


Extracting feature maps from val...


Batch: 100%|██████████| 265/265 [01:30<00:00,  2.93it/s]


Extracting feature maps from test...


Batch: 100%|██████████| 266/266 [01:34<00:00,  2.82it/s]


n Train:	8483
n Val:		8473
n Test:		8482


In [7]:
visualize_feature_maps("resnet50", our_model=True, datasetname="mri_data", one_plot=True)

Visualizing feature maps for our resnet50 on mri_data...
Extracting feature maps from train...


Batch: 100%|██████████| 72/72 [00:13<00:00,  5.40it/s]


Extracting feature maps from val...


Batch: 100%|██████████| 18/18 [00:03<00:00,  5.14it/s]


Extracting feature maps from test...


Batch: 100%|██████████| 13/13 [00:02<00:00,  5.13it/s]


n Train:	2298
n Val:		572
n Test:		394


In [8]:
visualize_feature_maps("resnet50", our_model=False, datasetname="mri_data", one_plot=True)

Visualizing feature maps for resnet50 on mri_data...
Extracting feature maps from train...


Batch: 100%|██████████| 72/72 [00:12<00:00,  5.57it/s]


Extracting feature maps from val...


Batch: 100%|██████████| 18/18 [00:03<00:00,  5.30it/s]


Extracting feature maps from test...


Batch: 100%|██████████| 13/13 [00:02<00:00,  5.28it/s]


n Train:	2298
n Val:		572
n Test:		394
