<a href="https://www.kaggle.com/code/ukaszniedwiadek/food101-exodia?scriptVersionId=178897214" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

<a href="https://www.kaggle.com/code/ukaszniedwiadek/food101-exodia?scriptVersionId=177471599" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



import torch
import os
import csv
import cv2
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import timm
from tqdm.notebook import tqdm
from timeit import default_timer as timer
import torch.nn as nn
import time


def load_img(path):
    img_bgr = cv2.imread(path)
    img_rgb = img_bgr[:, :, ::-1]
    return img_rgb


class CustomDataset(Dataset):
    def __init__(self, df, data_root, transforms=None, give_label=True):
        """Performed only once when the Dataset object is instantiated.
        give_label should be False for test data
        """ 
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.data_root = data_root
        self.transforms = transforms
        self.give_label = give_label
        
        if give_label == True:
            self.df['label'] = self.df['label'].astype(int)
            self.labels = self.df['label'].values

    def __len__(self):
        """Function to return the number of records in the dataset
        """ 
        return self.df.shape[0]
    
    def __getitem__(self, index):
        """Function to return samples corresponding to a given index from a dataset
        """ 
        # get labels
        if self.give_label:
            target = self.labels[index]
            target = torch.tensor(target)

        # Load images
        img  = load_img(f'{self.data_root}/{self.df.loc[index]["image_id"]}.jpg').astype(np.float32)
        # img /= 255.0 # Normalization

        # Transform images
        if self.transforms:
            img = self.transforms(image=img)['image']

        if self.give_label == True:
            return img, target
        else:
            return img

def get_labels(path, give_label):
    list_id = []
    list_label = []
    with open(path, mode ='r')as file:
        csvFile = csv.reader(file)
        for lines in csvFile:
            list_id.append(lines[0])
            if give_label:
                list_label.append(lines[1])
    list_id.pop(0)
    if give_label:
        list_label.pop(0)
        return list_id, list_label
    return list_id

main_dir = "/kaggle/input/dat18seefood"
batch_size = 32
image_size = 256




items = os.listdir(main_dir)
train_id = []
train_label = []

test_id = []
test_label = []

for item in items:
    if item == "train.csv":
        path = os.path.join(main_dir, item)
        train_id, train_label = get_labels(path, give_label=True)
    if item == "test.csv":
        test_id = get_labels(path, give_label=False)
        


X_train, X_val, y_train, y_val = train_test_split(train_id, train_label, stratify=train_label, test_size=0.20, random_state=42)
print(len(X_val))
print(len(X_train))
df_train = pd.DataFrame({
    'image_id': X_train,
    'label': y_train
})

df_val = pd.DataFrame({
    'image_id': X_val,
    'label': y_val
})

df_test = pd.DataFrame({
    'image_id': test_id,
})

df = pd.read_csv(main_dir + '/labelnames.csv')
print(df)
print(df_train["label"].value_counts())

image_transforms = {
    'train':
    A.Compose(([
        A.Resize(image_size,image_size,p=1),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(p=0.5),
        A.CoarseDropout(p=0.5),
        ToTensorV2(p=1.0)
    ])),
    'valid':
    A.Compose(([
        A.Resize(image_size,image_size,p=1),
        ToTensorV2(p=1.0)
    ])),
    'test':
    A.Compose(([
        A.Resize(image_size,image_size,p=1),
        ToTensorV2(p=1.0)
    ]))
}


train_dataset = CustomDataset(df_train, main_dir+"/train/",transforms=image_transforms["train"],give_label=True)
valid_dataset = CustomDataset(df_val, main_dir+"/train/",transforms=image_transforms["valid"],give_label=True)
test_dataset = CustomDataset(df_test, main_dir+"/test/",transforms=image_transforms["test"],give_label=False)

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
    'valid': DataLoader(valid_dataset, batch_size=batch_size, shuffle=True),
    'test': DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
}
trainiter = iter(dataloaders['train'])
features, labels = next(trainiter)
print(features.shape, labels.shape)

model = torch.hub.load('hankyul2/EfficientNetV2-pytorch', 'efficientnet_v2_s', pretrained=True, nclass=100)
# print(model)

epochs_no_improve = 0
valid_loss_min = np.Inf
max_epochs_stop = 3

valid_max_acc = 0
history = []
overall_start = timer()
learing_rate = 0.001

if True:
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
    print(f'Using {device} device')

    

    # Load model, loss function, and optimizing algorithm
    model = model.to(device)
    loss_fn = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learing_rate)
    train_loader = dataloaders["train"]
    valid_loader = dataloaders["valid"]
    history = []
            
    # Start training
    epochs = 10
    for epoch in range(epochs):
        time_start = time.time()
        print(f'==========Epoch {epoch+1} Start Training==========')
        model.train()
        
        train_loss = 0.0
        valid_loss = 0.0

        train_acc = 0
        valid_acc = 0
        
        start = timer()
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for step, (img, label) in pbar:
            img = img.to(device).float()
            label = label.to(device).long()

            output = model(img)
            loss = loss_fn(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * img.size(0)
            # Calculate accuracy by finding max log probability
            _, pred = torch.max(output, dim=1)
            correct_tensor = pred.eq(label.data.view_as(pred))
            # Need to convert correct tensor from int to float to average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            # Multiply average accuracy times the number of examples in batch
            train_acc += accuracy.item() * img.size(0)

            # Track training progress
            print(f'Epoch: {epoch}\t{100 * (step + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.',end='\r')
        
        
        model.epochs += 1
        with torch.no_grad():
            model.eval()
            pbar = tqdm(enumerate(valid_loader), total=len(valid_loader))
            for step, (img, label) in pbar:
                img = img.to(device).float()
                label = label.to(device).long()
                
                output = model(img)
                
                loss = loss_fn(output, label)
                
                valid_loss += loss.item() * data.size(0)
                # Calculate validation accuracy
                _, pred = torch.max(output, dim=1)
                correct_tensor = pred.eq(label.data.view_as(pred))
                accuracy = torch.mean(
                    correct_tensor.type(torch.FloatTensor))
                # Multiply average accuracy times the number of examples
                valid_acc += accuracy.item() * data.size(0)
                
                
        # Calculate average loss      
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)

        # Calculate average accuracy
        train_acc = train_acc / len(train_loader.dataset)
        valid_acc = valid_acc / len(valid_loader.dataset)

        history.append([train_loss, valid_loss, train_acc, valid_acc])
        print(f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}')
        print(f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%')
        
        if valid_loss < valid_loss_min:
            # Save model
            torch.save(model.state_dict(), save_file_name)
            # Track improvement
            epochs_no_improve = 0
            valid_loss_min = valid_loss
            valid_best_acc = valid_acc
            best_epoch = epoch
        else:
            epochs_no_improve += 1
            # Trigger early stopping
            if epochs_no_improve >= max_epochs_stop:
                print(
                    f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
                )
                total_time = timer() - overall_start
                print(
                    f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.'
                )

                # Load the best state dict
                #model.load_state_dict(torch.load(save_file_name))
                # Attach the optimizer
                model.optimizer = optimizer

                # Format history
                history = pd.DataFrame(
                    history,
                    columns=[
                        'train_loss', 'valid_loss', 'train_acc', 'valid_acc'
                    ])
                break
                    
        # print results from this epoch
        exec_t = int((time.time() - time_start)/60)
        print(
            f'Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} / Exec time {exec_t} min\n'
        )

# Attach the optimizer
model.optimizer = optimizer
# Record overall time and print out stats
total_time = timer() - overall_start
print(
    f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
)
print(
    f'{total_time:.2f} total seconds elapsed. {total_time / (epoch):.2f} seconds per epoch.'
)
# Format history
history = pd.DataFrame(
    history,
    columns=['train_loss', 'valid_loss', 'train_acc', 'valid_acc'])



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

15150
60600
     label       labelname
0        0       Apple pie
1        1  Baby back ribs
2        2         Baklava
3        3  Beef carpaccio
4        4    Beef tartare
..     ...             ...
96      96           Tacos
97      97        Takoyaki
98      98        Tiramisu
99      99    Tuna tartare
100    100         Waffles

[101 rows x 2 columns]
label
8     600
15    600
58    600
35    600
75    600
     ... 
5     600
64    600
72    600
95    600
53    600
Name: count, Length: 101, dtype: int64
torch.Size([32, 3, 256, 256]) torch.Size([32])


Downloading: "https://github.com/hankyul2/EfficientNetV2-pytorch/zipball/main" to /root/.cache/torch/hub/main.zip
will be placed in the single file you specified.

--2024-05-21 08:18:42--  https://github.com/hankyul2/EfficientNetV2-pytorch/releases/download/EfficientNetV2-pytorch/efficientnetv2-s.npy
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/426838056/4a12c9d8-491b-4719-98b2-0d88087a2e5b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20240521%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240521T081842Z&X-Amz-Expires=300&X-Amz-Signature=6f528ec0562694c77a40c6d09d75ee289ba9c6473fcccd15d2002440ece38b03&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=426838056&response-content-disposition=attachment%3B%20filename%3Defficientnetv2-s.npy&response-co

Using cpu device


  0%|          | 0/1894 [00:00<?, ?it/s]

Epoch: 0	0.16% complete. 60.15 seconds elapsed in epoch.

In [None]:
print("JD Disa zwisa")

In [1]:
import torch

print(torch.cuda.is_available())

True
