In [1]:
from torch import nn

from dataset_preparation import get_data

train_loader, val_loader, test_loader, classes_weights = get_data(batch_size=32)



In [2]:
classes_weights

Unnamed: 0,Class,Count,Weight
0,n02085620-Chihuahua,93,0.010753
1,n02085782-Japanese_spaniel,133,0.007519
2,n02085936-Maltese_dog,171,0.005848
3,n02086079-Pekinese,113,0.008850
4,n02086240-Shih-Tzu,154,0.006494
...,...,...,...
116,n02113978-Mexican_hairless,107,0.009346
117,n02115641-dingo,104,0.009615
118,n02115913-dhole,115,0.008696
119,n02116738-African_hunting_dog,108,0.009259


In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.decomposition import PCA


class MultiCNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(MultiCNNFeatureExtractor, self).__init__()

        # Load pretrained models
        self.inception_v3 = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)
        self.inception_v3.aux_logits = False  # Manually set aux_logits to False

        self.inception_v3.fc = nn.Identity()

        self.inception_resnet_v2 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.inception_resnet_v2.fc = nn.Identity()

        self.nasnet = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)
        self.nasnet.classifier = nn.Identity()

        self.pnasnet = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
        self.pnasnet.classifier = nn.Identity()

        # Feature dimensions
        self.feature_dims = {
            "inception_v3": 2048,
            "inception_resnet_v2": 2048,
            "nasnet": 1280,
            "pnasnet": 1024,
        }
        self.total_features = sum(self.feature_dims.values())


    def forward(self, x):

        x_299 = torch.nn.functional.interpolate(x, size=(299, 299), mode="bilinear", align_corners=False)

        features_v3 = self.inception_v3(x_299)  # Shape: [batch_size, 2048]
        features_resnet_v2 = self.inception_resnet_v2(x_299)  # Shape: [batch_size, 1536]
        features_nasnet = self.nasnet(x)  # Shape: [batch_size, 4032]
        features_pnasnet = self.pnasnet(x)  # Shape: [batch_size, 4320]
        

        
        # Concatenate all features
        combined_features = torch.cat(
            (features_v3, features_resnet_v2, features_nasnet, features_pnasnet), dim=1
        )  # Shape: [batch_size, total_features]

        return combined_features



In [4]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.decomposition import PCA
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score

device = "cuda"
num_classes = 121  


# Step 1: Define the Fine-Tuning Model
class MultiCNNFineTune(nn.Module):
    def __init__(self):
        super(MultiCNNFineTune, self).__init__()
        self.feature_extractor = MultiCNNFeatureExtractor()
        self.classifier = nn.Linear(self.feature_extractor.total_features, num_classes)

    def forward(self, x):
        features = self.feature_extractor(x)
        logits = self.classifier(features)
        return logits


# Step 2: Fine-Tune with Supervised Learning
fine_tune_model = MultiCNNFineTune().to(device)
optimizer = optim.Adam(fine_tune_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


epochs = 10
for epoch in range(epochs):
    fine_tune_model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = fine_tune_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

# Step 3: Remove the Classifier for Feature Extraction
feature_extractor = fine_tune_model.feature_extractor
feature_extractor.eval()


# Step 4: Precompute PCA and Train XGBoost
def precompute_pca_and_train_xgboost(feature_extractor, train_loader, val_loader, test_loader, device, n_components):
    # Extract features and apply PCA
    def extract_features(loader):
        all_features, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in tqdm(loader, desc="Extracting Features"):
                inputs = inputs.to(device)
                features = feature_extractor(inputs).detach().cpu().numpy()
                all_features.append(features)
                all_labels.extend(labels.numpy())
        return np.concatenate(all_features, axis=0), np.array(all_labels)

    train_features, train_labels = extract_features(train_loader)
    val_features, val_labels = extract_features(val_loader)
    test_features, test_labels = extract_features(test_loader)

    # Fit PCA
    pca = PCA(n_components=n_components)
    train_features = pca.fit_transform(train_features)
    val_features = pca.transform(val_features)
    test_features = pca.transform(test_features)

    # Train XGBoost
    xgb_model = xgb.XGBClassifier(objective="multi:softmax", num_class=num_classes, eval_metric="mlogloss", device="cuda")
    xgb_model.fit(train_features, train_labels, eval_set=[(val_features, val_labels)], early_stopping_rounds=10)

    # Evaluate XGBoost
    test_predictions = xgb_model.predict(test_features)
    accuracy = accuracy_score(test_labels, test_predictions)
    f1 = f1_score(test_labels, test_predictions, average="weighted")
    print(f"Test Accuracy: {accuracy:.4f}, Test F1 Score: {f1:.4f}")
    return xgb_model, pca


# Execute Feature Extraction and XGBoost Training
xgb_model, pca = precompute_pca_and_train_xgboost(feature_extractor, train_loader, val_loader, test_loader, device, n_components=300)


Epoch 1/10:   2%|▏         | 4/236 [02:35<2:30:20, 38.88s/it]


KeyboardInterrupt: 

In [None]:

from dataset_preparation import plot_training_history  
plot_training_history(history)