# Emancipate from Hugging Face and its limits


This implementation of Swin is in the transformers package by huggingface.\
Originally it is developped by Microsoft.\

In the transformers library, the model is declared here 
https://github.com/huggingface/transformers/blob/v4.26.1/src/transformers/models/swin/modeling_swin.py#L1152
and is a child of SwinPreTrainedModel.\
At the end of initialization it calls PreTrainedModel.post_init method (SwinPreTrainedModel inherits from PreTrainedModel).\
The method itself calls self.init_weights() and itself the self._init_weights() from the child class


What's more I can do data augmentation.

In [1]:
import os
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import copy

from transformers import SwinConfig, SwinForImageClassification, ViTImageProcessor, Trainer, TrainingArguments

import torch
from torchvision.io import read_image
from torch.utils.data import Dataset, random_split, DataLoader, WeightedRandomSampler
from torch.nn import CrossEntropyLoss
from torchmetrics.classification import MulticlassConfusionMatrix
from torchvision import transforms
from torchvision.datasets import ImageFolder
import gc

import evaluate

from transformers import AutoFeatureExtractor, AutoModelForImageClassification, DefaultDataCollator

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu") # in case gpu does not work

In [3]:
#os.environ["CUDA_VISIBLE_DEVICES"]="" # in case gpu does not work and the above does not work either

---
## Config

Import config of swin alread trained on Hugging Face

In [4]:
home = os.path.join(os.getcwd(), "..")
home

'C:\\Users\\Shadow\\Documents\\Projets\\MastereIA\\DataChallenge\\histopathological\\exploration\\..'

In [5]:
config_json = os.path.join(home, "data", "model", "config", "config.json")
preprocessor_config_json = os.path.join(home, "data", "model", "config", "preprocessor_config.json")
input_train = os.path.join(home, "data", "input", "Train")
model_path = os.path.join(home, "data", "model")

I get a pre-trained model from Microsoft.

In [6]:
# possibilities from most likely to less
# microsoft/swin-base-patch4-window7-224
# microsoft/swin-base-patch4-window7-224-in22k
pretrained_model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224-in22k")
for param in pretrained_model.parameters():
    param.requires_grad = False

In [7]:
#pretrained_model.swin

In [8]:
with open(config_json, 'r') as f:
  swin_config_args = json.load(f)

In [9]:
configuration = SwinConfig(**swin_config_args)
#configuration

In [10]:
model = SwinForImageClassification(configuration)
#model

In [11]:
model.swin = pretrained_model.swin

In [12]:
model.classifier

Linear(in_features=1024, out_features=8, bias=True)

In [13]:
with open(preprocessor_config_json, 'r') as f:
  vit_prepro_config_args = json.load(f)

In [14]:
extractor = ViTImageProcessor(**vit_prepro_config_args)
extractor

ViTImageProcessor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ViTImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

---
## Dataset

In [15]:
def parse_fn(filename):
    # <BIOPSY_PROCEDURE>_<TUMOR_CLASS>_<TUMOR_TYPE>-<YEAR>-<SLIDE_ID>-<MAG>-<SEQ>
    parsed = filename[:-4].replace('-', '_').split('_')
    parsed.append(filename)
    return parsed

def parse_type_id(filename):
    parsed = parse_fn(filename)
    label_maps = {'F':'1', 'DC':'2', 'PC':'3', 'PT':'4', 'MC':'5', 'LC':'6', 'A':'7', 'TA':'8'}
    return int(label_maps[parsed[2]])-1

In [16]:
class HistoDataset(Dataset):
    def __init__(self, img_dir, train=False, predict=False):
        self.img_dir = img_dir
        self.train = train
        self.predict = predict

    def __len__(self):
        return len(os.listdir(path=self.img_dir))

    def __getitem__(self, idx):
        filename = os.listdir(path=self.img_dir)[idx]
        img_path = os.path.join(self.img_dir, filename)
        image = read_image(img_path)
        if self.train:
            composed = transforms.Compose([transforms.ToPILImage(),
                                transforms.ColorJitter(brightness=.1, contrast=0, saturation=.1, hue=.1), 
                                transforms.RandomHorizontalFlip(0.3), 
                                transforms.RandomVerticalFlip(0.3),
                                transforms.RandomRotation(30),
                                transforms.TrivialAugmentWide(),
                                transforms.RandomApply(transforms=[transforms.RandomResizedCrop(size=(460, 700))], p=0.5),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=extractor.image_mean, std=extractor.image_std), 
                                transforms.RandomErasing(0.5),
                                ])
            image = composed(image)
        if self.predict:
            label='Unknown'
        else:
            label = parse_type_id(filename)
        image_features = extractor(image, return_tensors="pt").pixel_values.squeeze(0)
        return {'pixel_values':image_features, 'label':label, 'filename':filename}

In [17]:
histoDataset_train = HistoDataset(input_train, train=True)
#train_dataset, _, _ = random_split(histoDataset_train, [0.70, 0.20, 0.1], generator=torch.Generator().manual_seed(42))
train_dataset, _ = random_split(histoDataset_train, [0.78, 0.22], generator=torch.Generator().manual_seed(42))

In [18]:
histoDataset_eval_chal = HistoDataset(input_train, train=False)
#_, eval_dataset, challenge_dataset = random_split(histoDataset_eval_chal, [0.70, 0.20, 0.1], generator=torch.Generator().manual_seed(42))
_, eval_dataset = random_split(histoDataset_eval_chal, [0.78, 0.22], generator=torch.Generator().manual_seed(42))

In [19]:
#train_dataset[0]['pixel_values'].shape

In [20]:
#train_dataset[0]['label']

In [21]:
#plt.imshow(train_dataset[0]['pixel_values'].permute(1, 2, 0))

In [22]:
#plt.imshow(eval_dataset[0]['pixel_values'].permute(1, 2, 0))

In [23]:
len(train_dataset), len(eval_dataset)#, len(challenge_dataset)

(330, 92)

---
## Train

In [24]:
#checkpoint_path = torch.load(os.path.join(home, "data", 'output/swin/checkpoint-1300-reproduce-layer1_2_3_4_retrain/pytorch_model.bin'))
#model.load_state_dict(checkpoint_path)

In [25]:
# To unfreeze some selected layers
#len(pretrained_model.swin.encoder.layers) ==> 4 layers
for i in range(0, 4):
    for param in model.swin.encoder.layers[i].parameters():
        param.requires_grad = True

In [26]:
# To check if the earlier layers are frozen
for param in model.swin.encoder.layers[1].parameters():
    print(param.requires_grad)
    break

True


In [27]:
class HistoTrainer(Trainer):
    def compute_label_counts(self):
        """Compute the number of samples per class in the training dataset."""
        label_counts = {}
        for elt in self.train_dataset:
            label = elt['label']
            if label in label_counts:
                label_counts[label] += 1
            else:
                label_counts[label] = 1
        label_counts = dict(sorted(label_counts.items(), key=lambda x: x[0]))
        return label_counts
    
    def get_train_dataloader(self):
        train_dataset = self.train_dataset
        data_collator = self.data_collator
        if isinstance(train_dataset, Dataset):
            train_dataset = self._remove_unused_columns(train_dataset, description="training")
        else:
            data_collator = self._get_collator_with_removed_columns(data_collator, description="training")
            
        '''
        label_counts = self.compute_label_counts()
        max_samples = max(label_counts.values())
        classes_weights = {k: max_samples/v for k, v in label_counts.items()}
        #print(classes_weights)
        sample_weights = torch.DoubleTensor([classes_weights[elt['label']] for elt in train_dataset]).cuda()
        '''
        sample_weights = torch.DoubleTensor([1 for elt in train_dataset]).cuda()
        
        # Calculate class weights based on the frequency of each class in the training dataset
        #sample_weights = torch.tensor(list(classes_weights.values()), dtype=torch.float).cuda()
        # Create a weighted sampler that oversamples the minority class during training
        train_sampler = WeightedRandomSampler(sample_weights, len(self.train_dataset))

        train_dataloader = DataLoader(
            train_dataset,
            batch_size=self._train_batch_size,
            sampler=train_sampler,
            collate_fn=data_collator,
            drop_last=self.args.dataloader_drop_last,
            num_workers=self.args.dataloader_num_workers,
            pin_memory=self.args.dataloader_pin_memory,
        )
        return train_dataloader
    
    # If weights for classes are needed
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get('logits')
        # compute custom loss
        loss_fct = CrossEntropyLoss(weight=torch.tensor([0.2, 0.1, 0.1, 0.1, 0.2, 0.1, 0.1, 0.1]).cuda())
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [28]:
training_args = TrainingArguments(
    output_dir='../data/output/swin',          # output directory
    num_train_epochs=128,              # total # of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    #warmup_ratio=0.01,
    warmup_steps=50,                # number of warmup steps for learning rate scheduler
    logging_dir='../data/log',            # directory for storing logs
    logging_strategy="steps",
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=100,
    remove_unused_columns = False,
    weight_decay=0.01,
    learning_rate=5e-5,
    gradient_accumulation_steps=4,
    #ignore_data_skip=True,
    #resume_from_checkpoint=True,
    #no_cuda=True
)

trainer = Trainer( #HistoTrainer
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,            # evaluation dataset
    #data_collator=DefaultDataCollator(return_tensors="pt"),
    #tokenizer=extractor,
)

In [29]:
#trainer.compute_label_counts()

in our prediction file (computed with hugging face which has good results) :\
0    17\
1    54\
2    29\
3    25\
4    25\
6    56\
7     1

In [30]:
# to avoid errors such as  :
# OutOfMemoryError: CUDA out of memory. Tried to allocate 14.00 MiB (GPU 0; 8.00 GiB total capacity; 6.97 GiB already allocated; 0 bytes free; 7.34 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
torch.cuda.empty_cache()
gc.collect()

48

In [31]:
#checkpoint_path = os.path.join(home, "data", 'output/swin/checkpoint-1300')
#trainer.train(checkpoint_path)
#trainer.train()

In [None]:
#trainer.save_model(model_path)

## Evaluation

Evaluating how good the training went.

### Confusion matrices

In [46]:
# in case of overfitting to check another checkpoint
checkpoint_path = torch.load(os.path.join(home, "data", 'output/swin/checkpoint-5300/pytorch_model.bin'))
model.load_state_dict(checkpoint_path)
# Set the model to evaluation mode
model.eval()

SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0): SwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfOutput(

In [39]:
train_labels = []
train_preds = []
with torch.no_grad():
    for data in train_dataset:
        train_labels.append(data['label'])
        x = data['pixel_values'][None, :].to(device) # important if inputs and weights are not on the same processing unit
        train_preds.append(model(x).logits.argmax(-1).item())

In [40]:
target = torch.tensor(train_labels)
preds = torch.tensor(train_preds)
metric = MulticlassConfusionMatrix(num_classes=8)
metric(preds, target)

tensor([[40,  1,  1,  7,  9,  2,  2,  0],
        [ 1, 18,  6,  0,  2,  4,  4,  0],
        [ 4,  3, 35,  5,  7,  7,  5,  0],
        [ 6,  0,  3, 24,  5,  3,  2,  0],
        [ 3,  6,  1,  3, 25,  3,  4,  0],
        [ 2,  0,  2,  1,  0,  8,  0,  0],
        [ 7,  1,  7,  4,  1,  1, 34,  0],
        [ 2,  0,  1,  2,  0,  0,  1,  5]])

It went pretty well !

In [49]:
eval_labels = []
eval_preds = []
with torch.no_grad():
    for data in eval_dataset:
        eval_labels.append(data['label'])
        x = data['pixel_values'][None, :].to(device) # important if inputs and weights are not on the same processing unit
        eval_preds.append(model(x).logits.argmax(-1).item())

In [50]:
metric = MulticlassConfusionMatrix(num_classes=8)
metric(torch.tensor(eval_preds), torch.tensor(eval_labels))

tensor([[10,  0,  0,  0,  0,  0,  0,  0],
        [ 0, 11,  0,  0,  0,  0,  0,  0],
        [ 0,  0, 26,  0,  0,  0,  0,  0],
        [ 1,  0,  0, 13,  0,  0,  0,  0],
        [ 0,  0,  0,  0, 13,  0,  0,  0],
        [ 0,  0,  0,  0,  0,  3,  0,  0],
        [ 0,  0,  0,  1,  0,  0,  9,  0],
        [ 0,  0,  0,  0,  0,  0,  0,  5]])

### Validation metrics bundle

Obtain the results on the autotrain page :

    Loss: 0.179
    Accuracy: 0.966
    Macro F1: 0.959
    Micro F1: 0.966
    Weighted F1: 0.966
    Macro Precision: 0.969
    Micro Precision: 0.966
    Weighted Precision: 0.969
    Macro Recall: 0.954
    Micro Recall: 0.966
    Weighted Recall: 0.966


In [51]:
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")


def compute_metrics (labels, preds):
    results = {}
    results.update(accuracy_metric.compute(predictions=preds, references = labels))
    results.update({'f1_macro':f1_metric.compute(predictions=preds, references = labels, average="macro")['f1']})
    results.update({'f1_micro':f1_metric.compute(predictions=preds, references = labels, average="micro")['f1']})
    results.update({'f1_weighted':f1_metric.compute(predictions=preds, references = labels, average="weighted")['f1']})
    results.update({'precision_macro':precision_metric.compute(predictions=preds, references = labels, average="macro")['precision']})
    results.update({'precision_micro':precision_metric.compute(predictions=preds, references = labels, average="micro")['precision']})
    results.update({'precision_weighted':precision_metric.compute(predictions=preds, references = labels, average="weighted")['precision']})
    results.update({'recall_macro':recall_metric.compute(predictions=preds, references = labels, average="macro")['recall']})
    results.update({'recall_micro':recall_metric.compute(predictions=preds, references = labels, average="micro")['recall']})
    results.update({'recall_weighted':recall_metric.compute(predictions=preds, references = labels, average="weighted")['recall']})
    return results

compute_metrics(torch.tensor(train_labels), torch.tensor(train_preds))

{'accuracy': 1.0,
 'f1_macro': 1.0,
 'f1_micro': 1.0,
 'f1_weighted': 1.0,
 'precision_macro': 1.0,
 'precision_micro': 1.0,
 'precision_weighted': 1.0,
 'recall_macro': 1.0,
 'recall_micro': 1.0,
 'recall_weighted': 1.0}

In [52]:
compute_metrics(torch.tensor(eval_labels), torch.tensor(eval_preds))

{'accuracy': 0.9782608695652174,
 'f1_macro': 0.9785401002506267,
 'f1_micro': 0.9782608695652174,
 'f1_weighted': 0.9782336275471287,
 'precision_macro': 0.9797077922077922,
 'precision_micro': 0.9782608695652174,
 'precision_weighted': 0.9792490118577075,
 'recall_macro': 0.9785714285714286,
 'recall_micro': 0.9782608695652174,
 'recall_weighted': 0.9782608695652174}

## Prediction time

In [47]:
wd = os.path.join(os.getcwd(), '..')
data = os.path.join(wd, 'data', 'input')
image_test_path = os.path.join(data, 'Test')

# Test images
images_test = os.listdir(path=image_test_path)
submission_path = os.path.join(wd, 'data', 'output', 'submission', 'pred_swim_20230307_0002.csv')

def parseTest_fn(filename):
    # <BIOPSY_PROCEDURE>_<ID>
    parsed = filename[:-4].split('_')
    parsed.append(filename)
    return parsed

columns = ['procedure', 'id', 'filename']

df_test = pd.DataFrame(list(map(parseTest_fn, images_test)), columns=columns)
df_test['type_id'] = 0
df_test.head()

Unnamed: 0,procedure,id,filename,type_id
0,SOB,1,SOB_1.png,0
1,SOB,10,SOB_10.png,0
2,SOB,100,SOB_100.png,0
3,SOB,101,SOB_101.png,0
4,SOB,102,SOB_102.png,0


In [48]:
test_data = HistoDataset(image_test_path, predict=True)
types_test = []
for data in test_data:
    #inputs = extractor(data['pixel_values'], return_tensors="pt")
    with torch.no_grad():
        #logits = model(inputs['pixel_values'].cuda()).logits
        # predicted_label = logits.argmax(-1).item()
        x = data['pixel_values'][None, :].to(device)
        predicted_label = model(x).logits.argmax(-1).item()
        types_test.append(model.config.id2label[predicted_label])
    
    #train_labels.append(data['label'])
    #x = data['pixel_values'][None, :].to(device) # important if inputs and weights are not on the same processing unit
    #train_preds.append(model(x).logits.argmax(-1).item())

In [49]:
df_test['type_id'] = types_test
df_pred = df_test[['id', 'type_id']]
df_pred.to_csv(submission_path, index=False, header=False)

## Compare Auto and Microsoft base

Where are the differences in weights ?

In [66]:
#pretrained_model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224")
#pretrained_model = SwinForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224-in22k")

In [108]:
#checkpoint_path = torch.load(os.path.join(home, "data", 'output/swin/checkpoint-200/pytorch_model.bin'))
#model.load_state_dict(checkpoint_path)

<All keys matched successfully>

In [109]:
#access_token = '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'
#autotrain_model = AutoModelForImageClassification.from_pretrained("JoffreyMa/autotrain-histopathological_image_classification-3393093038", use_auth_token=access_token)

loading configuration file config.json from cache at C:\Users\Shadow/.cache\huggingface\hub\models--JoffreyMa--autotrain-histopathological_image_classification-3393093038\snapshots\ce7023a6fa2db96df4164220b0bca00a61b32548\config.json
Model config SwinConfig {
  "_name_or_path": "JoffreyMa/autotrain-histopathological_image_classification-3393093038",
  "architectures": [
    "SwinForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "depths": [
    2,
    2,
    18,
    2
  ],
  "drop_path_rate": 0.1,
  "embed_dim": 128,
  "encoder_stride": 32,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "id2label": {
    "0": "1",
    "1": "2",
    "2": "3",
    "3": "4",
    "4": "5",
    "5": "6",
    "6": "7",
    "7": "8"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "label2id": {
    "1": "0",
    "2": "1",
    "3": "2",
    "4": "3",
    "5": "4",
    "6": "5",
    "7": "6",
    "8": "7"
  },
  "layer_norm_eps": 1e-05,
  "max_length": 

In [110]:
for i in range(4):
    # Get the model parameters
    params1 = model.swin.encoder.layers[i].state_dict()
    params2 = autotrain_model.swin.encoder.layers[i].state_dict()

    # Compare the weight tensors layer by layer
    means = []
    for layer_name, _ in params1.items():
        diff = torch.abs(params1[layer_name].cpu() - params2[layer_name]).float()
        mean_diff = torch.mean(diff)
        means.append(mean_diff)
        #print(f"Layer name: {layer_name}")
        #print(f"Mean absolute difference: {mean_diff:.6f}")
    print(f'For layer {i} : {np.mean(means)}')

For layer 0 : 0.00018857356917578727
For layer 1 : 0.0001739619328873232
For layer 2 : 0.00023301976034417748
For layer 3 : 0.0006321489927358925


In [43]:
# Load CSV file into pandas dataframe
df = pd.read_csv( os.path.join(home, "data/output/submission/pred_swim_20230305_0925.csv"), header=None)

# Rename columns
df.columns = ["col1", "col2"]

# Count the number of rows for each distinct value in the second column
counts = df.groupby("col2").size()

# Print the result
print(counts)

col2
1    47
2    28
3    35
4    20
5    38
6     1
7    36
8     2
dtype: int64


In [44]:
# Load CSV file into pandas dataframe
df = pd.read_csv( os.path.join(home, "data/output/submission/pred_swim_20231002.csv"), header=None)

# Rename columns
df.columns = ["col1", "col2"]

# Count the number of rows for each distinct value in the second column
counts = df.groupby("col2").size()

# Print the result
print(counts)

col2
1    17
2    54
3    29
4    25
5    25
7    56
8     1
dtype: int64


In [50]:
# Load CSV file into pandas dataframe
df = pd.read_csv( os.path.join(home, "data/output/submission/pred_swim_20230307_0002.csv"), header=None)

# Rename columns
df.columns = ["col1", "col2"]

# Count the number of rows for each distinct value in the second column
counts = df.groupby("col2").size()

# Print the result
print(counts)

col2
1    22
2    50
3    32
4    20
5    17
7    65
8     1
dtype: int64
