In [1]:
%%capture
!pip install efficientnet_pytorch
!pip install torch_optimizer

In [2]:
import io
import random
import os
import math
import timm
from PIL import Image
from tqdm import tqdm
import gc
import pandas as pd
import multiprocessing
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import transforms
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
from torchvision.models import efficientnet_v2_s, efficientnet_v2_m, efficientnet_v2_l

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2


from torch.cuda.amp import GradScaler

  check_for_updates()


In [9]:
# Constants
BATCH_SIZE = 16
GRADIENT_ACCUMULATION_STEPS = 2
NUM_WORKERS = 2
IMAGE_SIZE = 320 
PIN_MEMORY = True 
PATIENCE = 5
N_FOLDS = 5

In [10]:
def calculate_dataset_stats(dataframe, image_dir):
    """Calculate mean and std of the dataset"""
    print("Calculating dataset mean and std...")
    
    # Basic transforms just for stats calculation
    basic_transforms = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor()
    ])
    
    class StatsDataset(torch.utils.data.Dataset):
        def __init__(self, df, img_dir, transform):
            self.df = df
            self.img_dir = img_dir
            self.transform = transform
        
        def __len__(self):
            return len(self.df)
        
        def __getitem__(self, idx):
            img_path = os.path.join(self.img_dir, self.df.iloc[idx].filename)
            image = Image.open(img_path).convert('RGB')
            return self.transform(image)
    
    # Create dataset and loader for stats calculation
    stats_dataset = StatsDataset(dataframe, image_dir, basic_transforms)
    stats_loader = DataLoader(
        stats_dataset,
        batch_size=32,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    means = []
    stds = []
    
    # Calculate mean and std
    for batch in tqdm(stats_loader, desc="Calculating dataset statistics"):
        means.append(batch.mean((0,2,3)))
        stds.append(batch.std((0,2,3)))
    
    dataset_mean = torch.stack(means).mean(0)
    dataset_std = torch.stack(stds).mean(0)
    
    print(f"Dataset mean: {dataset_mean}")
    print(f"Dataset std: {dataset_std}")
    
    return dataset_mean, dataset_std

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, image_dir: str, mode: str, mean=None, std=None):
        self.df = dataframe
        self.mode = mode
        self.image_dir = image_dir
        
        # Use calculated stats or ImageNet stats as fallback
        self.mean = mean if mean is not None else [0.485, 0.456, 0.406]
        self.std = std if std is not None else [0.229, 0.224, 0.225]
        
        if self.mode == 'train':
            self.transforms = transforms.Compose([
                transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.3),
                transforms.RandomRotation(15),
                transforms.ColorJitter(
                    brightness=0.2, 
                    contrast=0.2, 
                    saturation=0.2, 
                    hue=0.1
                ),
                transforms.RandomAffine(
                    degrees=10, 
                    translate=(0.1, 0.1), 
                    scale=(0.9, 1.1)
                ),
                transforms.RandomGrayscale(p=0.1),
                transforms.ToTensor(),
                transforms.Normalize(mean=self.mean, std=self.std)
            ])
        else:
            self.transforms = transforms.Compose([
                transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                transforms.ToTensor(),
                transforms.Normalize(mean=self.mean, std=self.std)
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index: int):
        try:
            row = self.df.iloc[index]
            image_path = os.path.join(self.image_dir, row.filename)
            
            try:
                image = Image.open(image_path).convert('RGB')
                image = self.transforms(image)
            except Exception as e:
                print(f"Error loading image {image_path}: {str(e)}")
                raise e

            if self.mode == 'test':
                return {
                    'image': image,
                    'filename': row.filename
                }
            else:
                return {
                    'image': image,
                    'target': row.city_id,
                    'filename': row.filename
                }
        except Exception as e:
            print(f"Error in __getitem__ at index {index}: {str(e)}")
            raise e


In [22]:
def load_data(train_df, test_df, train_dir, test_dir, fold=0, mean=None, std=None):
    print("Preparing data loaders...")
    
    label_encoder = LabelEncoder()
    train_df['city_id'] = label_encoder.fit_transform(train_df['city'])
    num_classes = len(label_encoder.classes_)
    
    train_data = train_df[train_df['fold'] != fold].reset_index(drop=True)
    valid_data = train_df[train_df['fold'] == fold].reset_index(drop=True)
    
    train_dataset = ImageDataset(train_data, train_dir, mode='train', mean=mean, std=std)
    valid_dataset = ImageDataset(valid_data, train_dir, mode='valid', mean=mean, std=std)
    test_dataset = ImageDataset(test_df, test_dir, mode='test', mean=mean, std=std)

    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(valid_dataset)}")
    print(f"Test dataset size: {len(test_dataset)}")
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        drop_last=True
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    return train_loader, valid_loader, test_loader, label_encoder, num_classes

In [11]:
import timm
from timm import create_model

class EfficientNetB6Head(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5):
        super().__init__()
        self.encoder = create_model(
            'tf_efficientnet_b6_ns',
            pretrained=True,
            num_classes=0
        )
        
        # Freeze some early layers
        for name, param in list(self.encoder.named_parameters())[:100]:
            param.requires_grad = False
            
        n_features = self.encoder.num_features
        
        # More gradual reduction in dimensions
        # Added LayerNorm for better regularization
        self.head = nn.Sequential(
            nn.Linear(n_features, 1536),
            nn.LayerNorm(1536),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(1536, 768),
            nn.LayerNorm(768),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(768, 384),
            nn.LayerNorm(384),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(384, num_classes)
        )
        
    def forward(self, x):
        features = self.encoder(x)
        return self.head(features)

In [12]:
import timm
from timm import create_model
import torch.nn as nn

class EfficientNetV2SmallHead(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5):
        super().__init__()
        self.encoder = create_model(
            'tf_efficientnetv2_s',  # Changed to EfficientNetV2 Small
            pretrained=True,
            num_classes=0
        )
        
        # Freeze early layers (adjusted for EfficientNetV2 Small)
        for name, param in list(self.encoder.named_parameters())[:70]:  # Reduced from 100 due to different architecture
            param.requires_grad = False
            
        n_features = self.encoder.num_features  # EfficientNetV2 Small has different feature dimensions
        
        # Adjusted head dimensions for EfficientNetV2 Small
        self.head = nn.Sequential(
            nn.Linear(n_features, 1024),  # Reduced from 1536 due to smaller backbone
            nn.LayerNorm(1024),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(1024, 512),  # Reduced from 768
            nn.LayerNorm(512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(512, 256),  # Reduced from 384
            nn.LayerNorm(256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(256, num_classes)
        )
        
    def forward(self, x):
        features = self.encoder(x)
        return self.head(features)

In [13]:
def calculate_macro_f1(preds, targets, num_classes):

    # Tensor'ları numpy array'e çevir
    if torch.is_tensor(preds):
        preds = preds.cpu().numpy()
    if torch.is_tensor(targets):
        targets = targets.cpu().numpy()
    
    # Her şehir için F1 skorunu hesapla
    city_f1_scores = []
    
    for city_idx in range(num_classes):
        # True Positives: Doğru tahmin edilen şehir sayısı
        tp = np.sum((preds == city_idx) & (targets == city_idx))
        
        # False Positives: Yanlış şehir olarak tahmin edilenler
        fp = np.sum((preds == city_idx) & (targets != city_idx))
        
        # False Negatives: Kaçırılan şehir tahminleri
        fn = np.sum((preds != city_idx) & (targets == city_idx))
        
        # Precision hesapla
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        
        # Recall hesapla
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        
        # F1 skoru hesapla
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0.0
        
        city_f1_scores.append(f1)
    
    # Macro F1: Tüm şehirlerin F1 skorlarının ortalaması
    macro_f1 = np.mean(city_f1_scores)
    
    return float(macro_f1)

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
import pandas as pd

class EnsemblePredictor:
    def __init__(self, models, weights=None, device='cuda'):
        """
        Args:
            models (list): Liste halinde modeller [EfficientNetB6Head, EfficientNetV2SmallHead]
            weights (list, optional): Her modelin ağırlığı. Default olarak eşit ağırlık.
            device (str): Kullanılacak cihaz ('cuda' veya 'cpu')
        """
        self.models = models
        self.device = device
        
        if weights is None:
            self.weights = [1/len(models)] * len(models)
        else:
            # Ağırlıkları normalize et
            total = sum(weights)
            self.weights = [w/total for w in weights]
            
        # Modelleri eval moduna al ve GPU'ya taşı
        for model in self.models:
            model.eval()
            model.to(self.device)
    
    @torch.no_grad()
    def predict_batch(self, batch):
        """
        Batch için ensemble tahmin yapar
        
        Args:
            batch (torch.Tensor): Model inputu olarak görüntü batch'i
            
        Returns:
            predictions (torch.Tensor): Tahmin edilen sınıf indeksleri
            ensemble_probas (torch.Tensor): Her sınıf için ensemble olasılıkları
        """
        # Her model için olasılıkları topla
        probas = []
        
        for model, weight in zip(self.models, self.weights):
            output = model(batch)
            proba = torch.softmax(output, dim=1) * weight
            probas.append(proba)
        
        # Ağırlıklı ortalama al
        ensemble_probas = sum(probas)
        
        # En yüksek olasılıklı sınıfı seç
        _, predictions = torch.max(ensemble_probas, 1)
        
        return predictions, ensemble_probas
    
    @torch.no_grad()
    def validate(self, valid_loader, criterion):
        """
        Validation seti üzerinde ensemble modelini değerlendir
        
        Args:
            valid_loader (DataLoader): Validation data loader
            criterion: Loss function
            
        Returns:
            avg_loss (float): Ortalama validation loss
            avg_f1 (float): Ortalama validation F1 score
        """
        running_loss = 0.0
        running_f1 = 0.0
        steps = 0
        
        for data in tqdm(valid_loader, desc='Validating Ensemble'):
            images = data['image'].cuda(non_blocking=True)
            targets = data['target'].cuda(non_blocking=True)
            
            # Ensemble tahminleri al
            preds, probas = self.predict_batch(images)
            
            # Loss hesapla
            loss = criterion(probas, targets)
            
            # F1 hesapla
            f1 = calculate_macro_f1(preds, targets, probas.size(1))
            
            running_loss += loss.item()
            running_f1 += f1
            steps += 1
            
            del images, probas, loss
            torch.cuda.empty_cache()
        
        return running_loss / steps, running_f1 / steps
    
    def generate_submission(self, test_loader, label_encoder):
        """
        Test seti için submission dosyası oluştur
        
        Args:
            test_loader (DataLoader): Test data loader
            label_encoder: Label encoder object
            
        Returns:
            submission (pd.DataFrame): Submission dataframe
        """
        predictions = []
        filenames = []
        probabilities = []
        
        for data in tqdm(test_loader, desc='Generating Predictions'):
            images = data['image'].cuda(non_blocking=True)
            
            # Ensemble tahminleri al
            preds, probas = self.predict_batch(images)
            
            predictions.extend(label_encoder.inverse_transform(preds.cpu().numpy()))
            filenames.extend(data['filename'])
            probabilities.extend(probas.cpu().numpy())
            
            del images, preds, probas
            torch.cuda.empty_cache()
        
        # Submission dataframe oluştur
        submission = pd.DataFrame({
            'filename': filenames,
            'city': predictions
        })
        
        # Tahmin olasılıklarını da kaydet
        probabilities = np.array(probabilities)
        for i, city in enumerate(label_encoder.classes_):
            submission[f'{city}_probability'] = probabilities[:, i]
        
        return submission
        
    def get_model_predictions(self, batch):
        """
        Her modelin ayrı ayrı tahminlerini döndür (debug için)
        
        Args:
            batch (torch.Tensor): Model inputu olarak görüntü batch'i
            
        Returns:
            model_predictions (list): Her modelin tahminleri
            model_probas (list): Her modelin olasılık dağılımları
        """
        model_predictions = []
        model_probas = []
        
        for model in self.models:
            output = model(batch)
            proba = torch.softmax(output, dim=1)
            _, preds = torch.max(proba, 1)
            
            model_predictions.append(preds)
            model_probas.append(proba)
            
        return model_predictions, model_probas

def load_models(efficientnetb6_path, efficientnetv2_path, num_classes):
    """
    Eğitilmiş model checkpointlerini yükle
    """
    # EfficientNetB6 modelini yükle
    efficientnetb6 = EfficientNetB6Head(num_classes=num_classes)
    efficientnetb6_state = torch.load(efficientnetb6_path, map_location='cuda')
    efficientnetb6.load_state_dict(efficientnetb6_state['model_state_dict'])
    
    # EfficientNetV2 modelini yükle
    efficientnetv2 = EfficientNetV2SmallHead(num_classes=num_classes)
    efficientnetv2_state = torch.load(efficientnetv2_path, map_location='cuda')
    efficientnetv2.load_state_dict(efficientnetv2_state['model_state_dict'])
    
    return [efficientnetb6, efficientnetv2]

@torch.no_grad()
def validate_ensemble(valid_loader, ensemble_predictor, criterion):
    """
    Validation seti üzerinde ensemble modelini değerlendir
    """
    running_loss = 0.0
    running_f1 = 0.0
    steps = 0
    
    for data in tqdm(valid_loader, desc='Validating Ensemble'):
        images = data['image'].cuda(non_blocking=True)
        targets = data['target'].cuda(non_blocking=True)
        
        # Ensemble tahminleri al
        preds, probas = ensemble_predictor.predict_batch(images)
        
        # Loss hesapla
        loss = criterion(probas, targets)
        
        # F1 hesapla
        f1 = calculate_macro_f1(preds, targets, probas.size(1))
        
        running_loss += loss.item()
        running_f1 += f1
        steps += 1
        
        del images, probas, loss
        torch.cuda.empty_cache()
    
    return running_loss / steps, running_f1 / steps

def generate_ensemble_submission(test_loader, ensemble_predictor, label_encoder):
    predictions = []
    filenames = []
    
    for data in tqdm(test_loader, desc='Generating Predictions'):
        images = data['image'].cuda(non_blocking=True)
        
        # Ensemble tahminleri al
        preds, _ = ensemble_predictor.predict_batch(images)  # * yerine _ kullanılmalı
        
        predictions.extend(label_encoder.inverse_transform(preds.cpu().numpy()))
        filenames.extend(data['filename'])
        
        del images, preds
        torch.cuda.empty_cache()
    
    submission = pd.DataFrame({
        'filename': filenames,
        'city': predictions
    })
    
    return submission

In [20]:
def prepare_data(train_df):
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
    train_df['fold'] = -1
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['city'])):
        train_df.loc[val_idx, 'fold'] = fold
    
    return train_df

In [23]:
if __name__ == '__main__':
    # Kaggle paths
    KAGGLE_INPUT = '/kaggle/input/datathon-ai-qualification-round'
    
    # Load data
    train = pd.read_csv(f'{KAGGLE_INPUT}/train_data.csv')
    test = pd.read_csv(f'{KAGGLE_INPUT}/test.csv')
    
    # Set correct image directories
    train_dir = f'{KAGGLE_INPUT}/train/train'
    test_dir = f'{KAGGLE_INPUT}/test/test'
    
    # Print dataset info
    print("Dataset Information:")
    print(f"Training samples: {len(train)}")
    print(f"Test samples: {len(test)}")
    print("\nSample training data:")
    print(train.head())
    print("\nSample test data:")
    print(test.head())
    
    # Verify paths exist
    for path in [train_dir, test_dir]:
        if not os.path.exists(path):
            raise ValueError(f"Path does not exist: {path}")
    
    # First prepare folds
    train = prepare_data(train)
    
    # Calculate dataset statistics ONLY on training fold
    training_fold = 0
    train_fold_data = train[train['fold'] != training_fold].reset_index(drop=True)
    
    print(f"Calculating statistics using {len(train_fold_data)} training samples...")
    train_mean, train_std = calculate_dataset_stats(train_fold_data, train_dir)
    
    # Enable memory optimizations
    torch.backends.cudnn.benchmark = False
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    
    try:
        # Set device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Train on fold 0
        train_loader, valid_loader, test_loader, label_encoder, num_classes = load_data(
            train, test, train_dir, test_dir, fold=training_fold,
            mean=train_mean.tolist(),
            std=train_std.tolist()
        )
        
        # Model paths
        EFFICIENTNETB6_PATH = '/kaggle/input/efficientnet/pytorch/default/1/reg_efficientnetb6_imgsize320_simulatedbatchsize32.pth'
        EFFICIENTNETV2_PATH = '/kaggle/input/efficientnet/pytorch/default/1/reg_efficientnetv2_imgsize320_simulatedbatchsize32.pth'
        
        print("Loading pretrained models...")
        # Load models
        models = load_models(EFFICIENTNETB6_PATH, EFFICIENTNETV2_PATH, num_classes=num_classes)
        
        # Model weights based on validation F1 scores
        weights = [0.92928, 0.92190]
        
        print("Creating ensemble predictor...")
        # Create ensemble predictor
        ensemble_predictor = EnsemblePredictor(
            models=models,
            weights=weights,
            device=device
        )
        
        print("Generating predictions...")
        # Generate predictions with probabilities
        submission = ensemble_predictor.generate_submission(test_loader, label_encoder)
        
        # Save submissions
        submission_path = '/kaggle/working/submission.csv'
        detailed_submission_path = '/kaggle/working/detailed_submission.csv'
        
        # Save basic submission
        final_submission = submission[['filename', 'city']]
        final_submission.to_csv(submission_path, index=False)
        
        # Save detailed submission with probabilities
        submission.to_csv(detailed_submission_path, index=False)
        
        print(f"Submissions saved to: {submission_path} and {detailed_submission_path}")
        
        # Verify submission format
        print("\nVerifying submission format...")
        if set(final_submission.columns) != {'filename', 'city'}:
            print("Warning: Submission columns do not match required format!")
        if not all(final_submission['city'].isin(['Istanbul', 'Ankara', 'Izmir'])):
            print("Warning: Submission contains invalid city names!")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise
    
    finally:
        # Clean up
        torch.cuda.empty_cache()
        gc.collect()

Dataset Information:
Training samples: 7000
Test samples: 2000

Sample training data:
          filename      city
0  image_10000.jpg  Istanbul
1  image_10001.jpg  Istanbul
2  image_10002.jpg    Ankara
3  image_10003.jpg     Izmir
4  image_10004.jpg    Ankara

Sample test data:
          filename  city
0  image_17000.jpg   NaN
1  image_17001.jpg   NaN
2  image_17002.jpg   NaN
3  image_17003.jpg   NaN
4  image_17004.jpg   NaN
Calculating statistics using 5600 training samples...
Calculating dataset mean and std...


Calculating dataset statistics: 100%|██████████| 175/175 [00:33<00:00,  5.27it/s]
  model = create_fn(


Dataset mean: tensor([0.5070, 0.5327, 0.5378])
Dataset std: tensor([0.2318, 0.2396, 0.2917])
Using device: cuda
Preparing data loaders...
Train dataset size: 5600
Validation dataset size: 1400
Test dataset size: 2000
Loading pretrained models...


model.safetensors:   0%|          | 0.00/173M [00:00<?, ?B/s]

  efficientnetb6_state = torch.load(efficientnetb6_path, map_location='cuda')


model.safetensors:   0%|          | 0.00/86.5M [00:00<?, ?B/s]

  efficientnetv2_state = torch.load(efficientnetv2_path, map_location='cuda')


Creating ensemble predictor...
Generating predictions...


Generating Predictions: 100%|██████████| 125/125 [00:27<00:00,  4.47it/s]


Submissions saved to: /kaggle/working/submission.csv and /kaggle/working/detailed_submission.csv

Verifying submission format...
