In [1]:
from torch import nn

from dataset_preparation import get_data

train_loader, val_loader, test_loader, classes_weights = get_data(batch_size=32)



In [2]:
classes_weights

Unnamed: 0,Class,Count,Weight
0,n02085620-Chihuahua,101,0.009901
1,n02085782-Japanese_spaniel,132,0.007576
2,n02085936-Maltese_dog,168,0.005952
3,n02086079-Pekinese,108,0.009259
4,n02086240-Shih-Tzu,147,0.006803
...,...,...,...
116,n02113978-Mexican_hairless,107,0.009346
117,n02115641-dingo,107,0.009346
118,n02115913-dhole,102,0.009804
119,n02116738-African_hunting_dog,123,0.008130


Model MultiCNNFeatureExtractor łączy cechy wyekstrahowane z czterech pretrenowanych sieci: InceptionV3, ResNet-50, MobileNetV2 i DenseNet121. Warstwy klasyfikacyjne zostały zastąpione Identity, aby uzyskać wyłącznie wektory cech. Wejście jest interpolowane do 299x299 dla InceptionV3, a pozostałe modele działają na oryginalnym rozmiarze. Cechy z każdej sieci są konkatenowane, tworząc wektor o wymiarze 7 360.












In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.decomposition import PCA


import torch
import torch.nn as nn
import torchvision.models as models
import timm  # Import timm for additional models


class MultiCNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(MultiCNNFeatureExtractor, self).__init__()

        # Load pretrained models
        self.inception_v3 = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)
        self.inception_v3.aux_logits = False  # Disable auxiliary logits
        self.inception_v3.fc = nn.Identity()

        self.resnet50 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet50.fc = nn.Identity()

        self.mobilenet_v2 = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)
        self.mobilenet_v2.classifier = nn.Identity()

        self.densenet121 = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
        self.densenet121.classifier = nn.Identity()

        # Feature dimensions
        self.feature_dims = {
            "inception_v3": 2048,
            "resnet50": 2048,
            "mobilenet_v2": 1280,
            "densenet121": 1024,
        }
        self.total_features = sum(self.feature_dims.values())

    def forward(self, x):
        x_299 = torch.nn.functional.interpolate(x, size=(299, 299), mode="bilinear", align_corners=False)

        features_v3 = self.inception_v3(x_299)  # Shape: [batch_size, 2048]
        features_resnet50 = self.resnet50(x)  # Shape: [batch_size, 2048]
        features_mobilenet = self.mobilenet_v2(x)  # Shape: [batch_size, 1280]
        features_densenet = self.densenet121(x)  # Shape: [batch_size, 1024]

        # Concatenate all features
        combined_features = torch.cat(
            (features_v3, features_resnet50, features_mobilenet, features_densenet), dim=1
        )  # Shape: [batch_size, total_features]

        return combined_features



Kod trenuje MultiCNNFeatureExtractor, dodając warstwę klasyfikacyjną i optymalizując ją przez 10 epok. Po treningu warstwa klasyfikacyjna jest usuwana, a model działa jako ekstraktor cech. Cechy są następnie przekształcane za pomocą PCA (redukcja do 5000 wymiarów) i wykorzystywane do trenowania XGBoost, którego hiperparametry są optymalizowane przy użyciu GridSearchCV. Model końcowy jest oceniany na zbiorze testowym przy użyciu accuracy i F1-score.





In [4]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.decomposition import PCA
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV

device = "cuda"
num_classes = 121  


# Step 1: Define the Fine-Tuning Model
class MultiCNNFineTune(nn.Module):
    def __init__(self):
        super(MultiCNNFineTune, self).__init__()
        self.feature_extractor = MultiCNNFeatureExtractor()
        self.classifier = nn.Linear(self.feature_extractor.total_features, num_classes)

    def forward(self, x):
        features = self.feature_extractor(x)
        logits = self.classifier(features)
        return logits


# Step 2: Fine-Tune with Supervised Learning
fine_tune_model = MultiCNNFineTune().to(device)
optimizer = optim.Adam(fine_tune_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


epochs = 10
for epoch in range(epochs):
    fine_tune_model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = fine_tune_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# Step 3: Remove the Classifier for Feature Extraction
feature_extractor = fine_tune_model.feature_extractor
feature_extractor.eval()


# Step 4: Precompute PCA and Train XGBoost
def precompute_pca_and_train_xgboost(feature_extractor, train_loader, val_loader, test_loader, device, n_components):
    # Extract features and apply PCA
    def extract_features(loader):
        all_features, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in tqdm(loader, desc="Extracting Features"):
                inputs = inputs.to(device)
                features = feature_extractor(inputs).detach().cpu().numpy()
                all_features.append(features)
                all_labels.extend(labels.numpy())
        return np.concatenate(all_features, axis=0), np.array(all_labels)

    train_features, train_labels = extract_features(train_loader)
    val_features, val_labels = extract_features(val_loader)
    test_features, test_labels = extract_features(test_loader)

    # Fit PCA
    pca = PCA(n_components=n_components)
    train_features = pca.fit_transform(train_features)
    val_features = pca.transform(val_features)
    test_features = pca.transform(test_features)

    # Train XGBoost
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 6, 9],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.7, 0.8, 1.0],
        'colsample_bytree': [0.7, 0.8, 1.0]
    }
    
    # Initialize XGBoost model
    xgb_base = xgb.XGBClassifier(objective="multi:softmax", num_class=num_classes, eval_metric="mlogloss", device="cuda")
    
    # Perform Grid Search Cross Validation
    grid_search = GridSearchCV(xgb_base, param_grid, cv=5, scoring='f1_weighted', verbose=1, n_jobs=-1)
    grid_search.fit(train_features, train_labels)
    
    # Best model from GridSearch
    best_xgb_model = grid_search.best_estimator_
    
    # Evaluate on test set
    test_predictions = best_xgb_model.predict(test_features)
    accuracy = accuracy_score(test_labels, test_predictions)
    f1 = f1_score(test_labels, test_predictions, average="weighted")
    print(f"Test Accuracy: {accuracy:.4f}, Test F1 Score: {f1:.4f}")
    return xgb_model, pca


# Execute Feature Extraction and XGBoost Training
xgb_model, pca = precompute_pca_and_train_xgboost(feature_extractor, train_loader, val_loader, test_loader, device, n_components=5000)


Epoch 1/10:   0%|          | 0/470 [00:25<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 46.00 MiB. GPU 0 has a total capacity of 16.00 GiB of which 0 bytes is free. Of the allocated memory 44.76 GiB is allocated by PyTorch, and 1.63 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)