In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


In [3]:
#pip install -U albumentations


In [4]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

train_csv_path = '/kaggle/input/plant-pathology-2020-fgvc7/train.csv'
image_dir = '/kaggle/input/plant-pathology-2020-fgvc7/images/'

train_df = pd.read_csv(train_csv_path)
train_df[train_df.columns[1:]] = train_df[train_df.columns[1:]].astype(float)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

class PlantDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0] + '.jpg')  # Например, Train_1524
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        labels = self.dataframe.iloc[idx, 1:].values.astype(float)
        labels = torch.tensor(labels, dtype=torch.float32)

        return image, labels
    
class TestDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.images = sorted([img for img in os.listdir(image_dir) if img.startswith('Test_') and img.endswith('.jpg')],
                             key=lambda x: int(x.split('_')[1].split('.')[0])) 

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        image = Image.open(img_path).convert("RGB")
        image = np.array(image)
        
        if self.transform:
            image = self.transform(image=image)['image']
        
        return self.images[idx], image

train_transform = A.Compose([
    A.Resize(224, 224),  
    A.HorizontalFlip(p=0.5), 
    A.RandomBrightnessContrast(p=0.2),  
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.5),  
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  
    ToTensorV2()  
])

val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

test_dataset = TestDataset(image_dir, transform=val_transform)
train_dataset = PlantDataset(train_df, image_dir, transform=train_transform)
val_dataset = PlantDataset(val_df, image_dir, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=256, num_workers=0, pin_memory=True, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, num_workers=0, pin_memory=True, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")


Train dataset size: 1310
Validation dataset size: 328


  original_init(self, **validated_kwargs)


In [5]:
class EarlyStopping():
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score  self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'Раннее прекращение: {self.counter} из {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            self.trace_func(f'Валидационная ошибка уменьшилась ({self.val_loss_min:.6f} --> {val_loss:.6f}).')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


SyntaxError: invalid syntax (2942084024.py, line 20)

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torchvision.models import ResNet34_Weights


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)

num_classes = 4
model.fc = nn.Linear(model.fc.in_features, num_classes)

model = model.to(device)
criterion = nn.BCEWithLogitsLoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    early_stopping = EarlyStopping(patience=3, verbose=True)

    for epoch in range(num_epochs):
        model.train()  
        running_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

        val_loss = validate_model(model, val_loader, criterion)

        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            print("Stopping training early")
            break

def validate_model(model, val_loader, criterion):
    model.eval()  
    running_loss = 0.0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

    val_loss = running_loss / len(val_loader)
    print(f'Validation Loss: {val_loss:.4f}')
    return val_loss

train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15)


Using device: cpu


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 115MB/s]


NameError: name 'EarlyStopping' is not defined

In [7]:
def predict(model, test_loader, device):
    model.eval()
    results = []

    with torch.no_grad():
        for image_ids, images in test_loader:
            images = images.to(device)  

            outputs = model(images)
            probabilities = torch.sigmoid(outputs)  
            
            for img_id, probs in zip(image_ids, probabilities):
                row = [img_id.replace('.jpg', '')] + [0] * 4  
                class_idx = torch.argmax(probs).item()  
                row[class_idx + 1] = 1 
                results.append(row)

    return results

predictions = predict(model, test_loader, device)

columns = ['image_id', 'healthy', 'multiple_diseases', 'rust', 'scab']
df = pd.DataFrame(predictions, columns=columns)
df.to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv")


Predictions saved to predictions.csv


In [8]:
df = pd.read_csv('predictions.csv')
df.head(10)


Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,0,0,1,0
1,Test_1,0,0,1,0
2,Test_2,0,0,1,0
3,Test_3,0,0,1,0
4,Test_4,0,0,1,0
5,Test_5,0,0,1,0
6,Test_6,0,0,1,0
7,Test_7,0,0,1,0
8,Test_8,0,0,1,0
9,Test_9,0,0,1,0
