In [5]:
%%capture
!pip install efficientnet_pytorch
!pip install torch_optimizer

In [6]:
import io
import random
import os
import math
import timm
from PIL import Image
from tqdm import tqdm
import gc
import pandas as pd
import multiprocessing
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import transforms
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
from torchvision.models import efficientnet_v2_s, efficientnet_v2_m, efficientnet_v2_l

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2


from torch.cuda.amp import GradScaler

In [7]:
# Constants
BATCH_SIZE = 16
GRADIENT_ACCUMULATION_STEPS = 2
NUM_WORKERS = 2
IMAGE_SIZE = 320 
PIN_MEMORY = True 
PATIENCE = 5
N_FOLDS = 5

In [8]:
def calculate_dataset_stats(dataframe, image_dir):
    """Calculate mean and std of the dataset"""
    print("Calculating dataset mean and std...")
    
    # Basic transforms just for stats calculation
    basic_transforms = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor()
    ])
    
    class StatsDataset(torch.utils.data.Dataset):
        def __init__(self, df, img_dir, transform):
            self.df = df
            self.img_dir = img_dir
            self.transform = transform
        
        def __len__(self):
            return len(self.df)
        
        def __getitem__(self, idx):
            img_path = os.path.join(self.img_dir, self.df.iloc[idx].filename)
            image = Image.open(img_path).convert('RGB')
            return self.transform(image)
    
    # Create dataset and loader for stats calculation
    stats_dataset = StatsDataset(dataframe, image_dir, basic_transforms)
    stats_loader = DataLoader(
        stats_dataset,
        batch_size=32,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    means = []
    stds = []
    
    # Calculate mean and std
    for batch in tqdm(stats_loader, desc="Calculating dataset statistics"):
        means.append(batch.mean((0,2,3)))
        stds.append(batch.std((0,2,3)))
    
    dataset_mean = torch.stack(means).mean(0)
    dataset_std = torch.stack(stds).mean(0)
    
    print(f"Dataset mean: {dataset_mean}")
    print(f"Dataset std: {dataset_std}")
    
    return dataset_mean, dataset_std

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, image_dir: str, mode: str, mean=None, std=None):
        self.df = dataframe
        self.mode = mode
        self.image_dir = image_dir
        
        # Use calculated stats or ImageNet stats as fallback
        self.mean = mean if mean is not None else [0.485, 0.456, 0.406]
        self.std = std if std is not None else [0.229, 0.224, 0.225]
        
        if self.mode == 'train':
            self.transforms = transforms.Compose([
                transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.3),
                transforms.RandomRotation(15),
                transforms.ColorJitter(
                    brightness=0.2, 
                    contrast=0.2, 
                    saturation=0.2, 
                    hue=0.1
                ),
                transforms.RandomAffine(
                    degrees=10, 
                    translate=(0.1, 0.1), 
                    scale=(0.9, 1.1)
                ),
                transforms.RandomGrayscale(p=0.1),
                transforms.ToTensor(),
                transforms.Normalize(mean=self.mean, std=self.std)
            ])
        else:
            self.transforms = transforms.Compose([
                transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                transforms.ToTensor(),
                transforms.Normalize(mean=self.mean, std=self.std)
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index: int):
        try:
            row = self.df.iloc[index]
            image_path = os.path.join(self.image_dir, row.filename)
            
            try:
                image = Image.open(image_path).convert('RGB')
                image = self.transforms(image)
            except Exception as e:
                print(f"Error loading image {image_path}: {str(e)}")
                raise e

            if self.mode == 'test':
                return {
                    'image': image,
                    'filename': row.filename
                }
            else:
                return {
                    'image': image,
                    'target': row.city_id,
                    'filename': row.filename
                }
        except Exception as e:
            print(f"Error in __getitem__ at index {index}: {str(e)}")
            raise e


In [9]:
import timm
from timm import create_model

class EfficientNetB6Head(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5):
        super().__init__()
        self.encoder = create_model(
            'tf_efficientnet_b6_ns',
            pretrained=True,
            num_classes=0
        )
        
        # Freeze some early layers
        for name, param in list(self.encoder.named_parameters())[:100]:
            param.requires_grad = False
            
        n_features = self.encoder.num_features
        
        # More gradual reduction in dimensions
        # Added LayerNorm for better regularization
        self.head = nn.Sequential(
            nn.Linear(n_features, 1536),
            nn.LayerNorm(1536),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(1536, 768),
            nn.LayerNorm(768),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(768, 384),
            nn.LayerNorm(384),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            
            nn.Linear(384, num_classes)
        )
        
    def forward(self, x):
        features = self.encoder(x)
        return self.head(features)

In [10]:
def prepare_data(train_df):
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
    train_df['fold'] = -1
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['city'])):
        train_df.loc[val_idx, 'fold'] = fold
    
    return train_df

def load_data(train_df, test_df, train_dir, test_dir, fold=0, mean=None, std=None):
    print("Preparing data loaders...")
    
    label_encoder = LabelEncoder()
    train_df['city_id'] = label_encoder.fit_transform(train_df['city'])
    num_classes = len(label_encoder.classes_)
    
    train_data = train_df[train_df['fold'] != fold].reset_index(drop=True)
    valid_data = train_df[train_df['fold'] == fold].reset_index(drop=True)
    
    train_dataset = ImageDataset(train_data, train_dir, mode='train', mean=mean, std=std)
    valid_dataset = ImageDataset(valid_data, train_dir, mode='valid', mean=mean, std=std)
    test_dataset = ImageDataset(test_df, test_dir, mode='test', mean=mean, std=std)

    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(valid_dataset)}")
    print(f"Test dataset size: {len(test_dataset)}")
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        drop_last=True
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY
    )
    
    return train_loader, valid_loader, test_loader, label_encoder, num_classes

def calculate_macro_f1(preds, targets, num_classes):

    # Tensor'ları numpy array'e çevir
    if torch.is_tensor(preds):
        preds = preds.cpu().numpy()
    if torch.is_tensor(targets):
        targets = targets.cpu().numpy()
    
    # Her şehir için F1 skorunu hesapla
    city_f1_scores = []
    
    for city_idx in range(num_classes):
        # True Positives: Doğru tahmin edilen şehir sayısı
        tp = np.sum((preds == city_idx) & (targets == city_idx))
        
        # False Positives: Yanlış şehir olarak tahmin edilenler
        fp = np.sum((preds == city_idx) & (targets != city_idx))
        
        # False Negatives: Kaçırılan şehir tahminleri
        fn = np.sum((preds != city_idx) & (targets == city_idx))
        
        # Precision hesapla
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        
        # Recall hesapla
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        
        # F1 skoru hesapla
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0.0
        
        city_f1_scores.append(f1)
    
    # Macro F1: Tüm şehirlerin F1 skorlarının ortalaması
    macro_f1 = np.mean(city_f1_scores)
    
    return float(macro_f1)

In [11]:
def train_step(train_loader, model, criterion, optimizer, epoch, scaler):
    model.train()
    running_loss = 0.0
    running_f1 = 0.0
    steps = 0
    
    optimizer.zero_grad(set_to_none=True)
    
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, data in pbar:
        images = data['image'].cuda(non_blocking=True)
        targets = data['target'].cuda(non_blocking=True)
        
        with torch.amp.autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, targets)
            loss = loss / GRADIENT_ACCUMULATION_STEPS
        
        # Mixed precision backward pass
        scaler.scale(loss).backward()
        
        # Gradient accumulation
        if (i + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)
        
        # Calculate metrics
        with torch.no_grad():
            _, preds = torch.max(outputs, 1)
            f1 = calculate_macro_f1(preds, targets, outputs.size(1))
        
        # Update metrics
        running_loss += loss.item() * GRADIENT_ACCUMULATION_STEPS
        running_f1 += f1
        steps += 1
        
        # Update progress bar
        pbar.set_description(
            f'Epoch {epoch} - Loss: {running_loss/steps:.4f}, F1: {running_f1/steps:.4f}'
        )
        
        # Clear memory
        del images, outputs, loss
        torch.cuda.empty_cache()
    
    return running_f1 / steps

def train_model(train_loader, valid_loader, model, criterion, optimizer, scheduler, scaler):
    """Training loop with early stopping"""
    patience = PATIENCE
    counter = 0
    best_valid_f1 = 0.0
    epoch = 0
    
    while True:
        epoch += 1
        torch.cuda.empty_cache()
        gc.collect()
        
        # Train
        train_f1 = train_step(train_loader, model, criterion, optimizer, epoch, scaler)
        
        # Validate
        valid_loss, valid_f1 = validate(valid_loader, model, criterion)
        
        # Scheduler step
        scheduler.step()
        
        print(f"Epoch {epoch} - Train F1: {train_f1:.4f}, Valid F1: {valid_f1:.4f}")
        
        if valid_f1 > best_valid_f1:
            best_valid_f1 = valid_f1
            counter = 0
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),  # scheduler'ı da kaydedelim
                'epoch': epoch,
                'best_f1': best_valid_f1,
            }, 'best_model.pth')
            print(f"Saved best model with F1: {best_valid_f1:.4f}")
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break
    
    return best_valid_f1  

@torch.no_grad()
def validate(valid_loader, model, criterion):
    model.eval()
    running_loss = 0.0
    running_f1 = 0.0
    steps = 0
    
    for data in tqdm(valid_loader, desc='Validating'):
        images = data['image'].cuda(non_blocking=True)
        targets = data['target'].cuda(non_blocking=True)
        
        # Updated autocast syntax
        with torch.amp.autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, targets)
        
        _, preds = torch.max(outputs, 1)
        f1 = calculate_macro_f1(preds, targets, outputs.size(1))
        
        running_loss += loss.item()
        running_f1 += f1
        steps += 1
        
        del images, outputs, loss
        torch.cuda.empty_cache()
    
    return running_loss / steps, running_f1 / steps

@torch.no_grad()
def predict(test_loader, model, label_encoder):
    model.eval()
    predictions = []
    filenames = []
    
    for data in tqdm(test_loader):
        images = data['image'].cuda(non_blocking=True)
        
        with torch.cuda.amp.autocast():
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
        
        predictions.extend(label_encoder.inverse_transform(preds.cpu().numpy()))
        filenames.extend(data['filename'])
        
        del images, outputs, preds
        torch.cuda.empty_cache()
    
    return filenames, predictions


In [12]:
def generate_submission(test_loader, model, label_encoder):
    filenames, predictions = predict(test_loader, model, label_encoder)
    
    submission = pd.DataFrame({
        'filename': filenames,
        'city': predictions
    })
    
    submission.to_csv('submission.csv', index=False)
    return submission

In [13]:
if __name__ == '__main__':
    # Kaggle paths
    KAGGLE_INPUT = '/kaggle/input/datathon-ai-qualification-round'
    
    # Load data
    train = pd.read_csv(f'{KAGGLE_INPUT}/train_data.csv')
    test = pd.read_csv(f'{KAGGLE_INPUT}/test.csv')
    
    # Set correct image directories
    train_dir = f'{KAGGLE_INPUT}/train/train'
    test_dir = f'{KAGGLE_INPUT}/test/test'
    
    # Print dataset info
    print("Dataset Information:")
    print(f"Training samples: {len(train)}")
    print(f"Test samples: {len(test)}")
    print("\nSample training data:")
    print(train.head())
    print("\nSample test data:")
    print(test.head())
    
    # Verify paths exist
    for path in [train_dir, test_dir]:
        if not os.path.exists(path):
            raise ValueError(f"Path does not exist: {path}")
    
    # First prepare folds - MOVED BEFORE stats calculation
    train = prepare_data(train)
    
    # Calculate dataset statistics ONLY on training fold
    # Get the data for the training fold (excluding validation data)
    training_fold = 0  # Assuming we're using fold 0 for validation
    train_fold_data = train[train['fold'] != training_fold].reset_index(drop=True)
    
    print(f"Calculating statistics using {len(train_fold_data)} training samples...")
    train_mean, train_std = calculate_dataset_stats(train_fold_data, train_dir)
    
    # Enable memory optimizations
    torch.backends.cudnn.benchmark = False
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    
    try:
        # Set device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
        
        # Initialize mixed precision training
        scaler = torch.amp.GradScaler(device='cuda')
        
        # Train on fold 0
        train_loader, valid_loader, test_loader, label_encoder, num_classes = load_data(
            train, test, train_dir, test_dir, fold=training_fold,
            mean=train_mean.tolist(),
            std=train_std.tolist()
        )
        
        # Initialize model
        model = EfficientNetB6Head(num_classes=num_classes)
        model = model.to(device)
        
        # Optimizer and criterion
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        optimizer = optim.AdamW(
        model.parameters(),
        lr=5e-4,  # Daha düşük learning rate
        weight_decay=0.05,  # Daha yüksek weight decay
        betas=(0.9, 0.999)
    )
    
        # Scheduler
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=30,  # maximum epoch sayısı
        eta_min=1e-6  # minimum learning rate
    )
        
        print("Starting training...")
        # Train model with early stopping
        best_valid_f1 = train_model(
            train_loader, valid_loader, model, 
            criterion, optimizer, scheduler, scaler 
        )
        
        print("Loading best model for submission...")
        # Load best model and generate submission
        checkpoint = torch.load('best_model.pth')
        model.load_state_dict(checkpoint['model_state_dict'])
        
        print("Generating submission file...")
        submission = generate_submission(test_loader, model, label_encoder)
        
        # Save submission
        submission_path = '/kaggle/working/submission.csv'
        submission.to_csv(submission_path, index=False)
        
        print(f"Training completed. Best validation F1: {best_valid_f1:.4f}")
        print(f"Submission saved to: {submission_path}")
        
        # Verify submission format
        print("\nVerifying submission format...")
        if set(submission.columns) != {'filename', 'city'}:
            print("Warning: Submission columns do not match required format!")
        if not all(submission['city'].isin(['Istanbul', 'Ankara', 'Izmir'])):
            print("Warning: Submission contains invalid city names!")
        
    except Exception as e:
        print(f"An error occurred during training: {str(e)}")
        raise
    
    finally:
        # Clean up
        torch.cuda.empty_cache()
        gc.collect()

Dataset Information:
Training samples: 7000
Test samples: 2000

Sample training data:
          filename      city
0  image_10000.jpg  Istanbul
1  image_10001.jpg  Istanbul
2  image_10002.jpg    Ankara
3  image_10003.jpg     Izmir
4  image_10004.jpg    Ankara

Sample test data:
          filename  city
0  image_17000.jpg   NaN
1  image_17001.jpg   NaN
2  image_17002.jpg   NaN
3  image_17003.jpg   NaN
4  image_17004.jpg   NaN
Calculating statistics using 5600 training samples...
Calculating dataset mean and std...


Calculating dataset statistics: 100%|██████████| 175/175 [00:45<00:00,  3.85it/s]
  model = create_fn(


Dataset mean: tensor([0.5070, 0.5327, 0.5378])
Dataset std: tensor([0.2318, 0.2396, 0.2917])
Using device: cuda
Preparing data loaders...
Train dataset size: 5600
Validation dataset size: 1400
Test dataset size: 2000


model.safetensors:   0%|          | 0.00/173M [00:00<?, ?B/s]

Starting training...


Epoch 1 - Loss: 0.9319, F1: 0.5832: 100%|██████████| 350/350 [03:11<00:00,  1.83it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.86it/s]


Epoch 1 - Train F1: 0.5832, Valid F1: 0.7315
Saved best model with F1: 0.7315


Epoch 2 - Loss: 0.7712, F1: 0.7154: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.96it/s]


Epoch 2 - Train F1: 0.7154, Valid F1: 0.7552
Saved best model with F1: 0.7552


Epoch 3 - Loss: 0.6881, F1: 0.7727: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.95it/s]


Epoch 3 - Train F1: 0.7727, Valid F1: 0.8044
Saved best model with F1: 0.8044


Epoch 4 - Loss: 0.6474, F1: 0.7949: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.95it/s]


Epoch 4 - Train F1: 0.7949, Valid F1: 0.8142
Saved best model with F1: 0.8142


Epoch 5 - Loss: 0.6181, F1: 0.8128: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.95it/s]


Epoch 5 - Train F1: 0.8128, Valid F1: 0.8457
Saved best model with F1: 0.8457


Epoch 6 - Loss: 0.5788, F1: 0.8317: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 6 - Train F1: 0.8317, Valid F1: 0.8447


Epoch 7 - Loss: 0.5375, F1: 0.8617: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.96it/s]


Epoch 7 - Train F1: 0.8617, Valid F1: 0.8428


Epoch 8 - Loss: 0.5107, F1: 0.8753: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.96it/s]


Epoch 8 - Train F1: 0.8753, Valid F1: 0.8692
Saved best model with F1: 0.8692


Epoch 9 - Loss: 0.4932, F1: 0.8843: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 9 - Train F1: 0.8843, Valid F1: 0.8762
Saved best model with F1: 0.8762


Epoch 10 - Loss: 0.4689, F1: 0.9005: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 10 - Train F1: 0.9005, Valid F1: 0.8820
Saved best model with F1: 0.8820


Epoch 11 - Loss: 0.4633, F1: 0.9009: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 11 - Train F1: 0.9009, Valid F1: 0.8681


Epoch 12 - Loss: 0.4410, F1: 0.9146: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.95it/s]


Epoch 12 - Train F1: 0.9146, Valid F1: 0.8662


Epoch 13 - Loss: 0.4206, F1: 0.9284: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 13 - Train F1: 0.9284, Valid F1: 0.8881
Saved best model with F1: 0.8881


Epoch 14 - Loss: 0.3999, F1: 0.9380: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 14 - Train F1: 0.9380, Valid F1: 0.8948
Saved best model with F1: 0.8948


Epoch 15 - Loss: 0.3967, F1: 0.9407: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 15 - Train F1: 0.9407, Valid F1: 0.8809


Epoch 16 - Loss: 0.3678, F1: 0.9579: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 16 - Train F1: 0.9579, Valid F1: 0.8992
Saved best model with F1: 0.8992


Epoch 17 - Loss: 0.3572, F1: 0.9622: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 17 - Train F1: 0.9622, Valid F1: 0.8909


Epoch 18 - Loss: 0.3504, F1: 0.9682: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 18 - Train F1: 0.9682, Valid F1: 0.8757


Epoch 19 - Loss: 0.3391, F1: 0.9733: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 19 - Train F1: 0.9733, Valid F1: 0.8970


Epoch 20 - Loss: 0.3375, F1: 0.9740: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 20 - Train F1: 0.9740, Valid F1: 0.8986


Epoch 21 - Loss: 0.3267, F1: 0.9801: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 21 - Train F1: 0.9801, Valid F1: 0.9022
Saved best model with F1: 0.9022


Epoch 22 - Loss: 0.3250, F1: 0.9809: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 22 - Train F1: 0.9809, Valid F1: 0.9066
Saved best model with F1: 0.9066


Epoch 23 - Loss: 0.3268, F1: 0.9801: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 23 - Train F1: 0.9801, Valid F1: 0.9014


Epoch 24 - Loss: 0.3160, F1: 0.9849: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 24 - Train F1: 0.9849, Valid F1: 0.9053


Epoch 25 - Loss: 0.3114, F1: 0.9894: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.90it/s]


Epoch 25 - Train F1: 0.9894, Valid F1: 0.9089
Saved best model with F1: 0.9089


Epoch 26 - Loss: 0.3116, F1: 0.9888: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 26 - Train F1: 0.9888, Valid F1: 0.9045


Epoch 27 - Loss: 0.3026, F1: 0.9953: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 27 - Train F1: 0.9953, Valid F1: 0.9131
Saved best model with F1: 0.9131


Epoch 28 - Loss: 0.3061, F1: 0.9917: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 28 - Train F1: 0.9917, Valid F1: 0.9192
Saved best model with F1: 0.9192


Epoch 29 - Loss: 0.3053, F1: 0.9934: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.92it/s]


Epoch 29 - Train F1: 0.9934, Valid F1: 0.9134


Epoch 30 - Loss: 0.3048, F1: 0.9949: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.93it/s]


Epoch 30 - Train F1: 0.9949, Valid F1: 0.9158


Epoch 31 - Loss: 0.3037, F1: 0.9937: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.92it/s]


Epoch 31 - Train F1: 0.9937, Valid F1: 0.9189


Epoch 32 - Loss: 0.3079, F1: 0.9900: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.94it/s]


Epoch 32 - Train F1: 0.9900, Valid F1: 0.9147


Epoch 33 - Loss: 0.3036, F1: 0.9938: 100%|██████████| 350/350 [03:10<00:00,  1.84it/s]
Validating: 100%|██████████| 88/88 [00:12<00:00,  6.95it/s]
  checkpoint = torch.load('best_model.pth')


Epoch 33 - Train F1: 0.9938, Valid F1: 0.9191
Early stopping at epoch 33
Loading best model for submission...
Generating submission file...


  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
  with torch.c

Training completed. Best validation F1: 0.9192
Submission saved to: /kaggle/working/submission.csv

Verifying submission format...
