In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Model VGG16

In [42]:
import os
from PIL import Image
from torch.utils.data import Dataset
import torch
import pandas as pd
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim

In [55]:
class ImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, is_test=False):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if not self.is_test:
            # Fetch training images using paths from train.csv
            img_path = os.path.join(self.root_dir, self.data.iloc[idx, 1])
            image = Image.open(img_path).convert('RGB')
            label = int(self.data.iloc[idx, 0].split('_')[1]) - 1  # Labeling
            label = torch.tensor(label)
            if self.transform:
                image = self.transform(image)
            return image, label
        else:
            # For test data
            img_path = os.path.join(self.root_dir, self.data.iloc[idx, 1])
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, self.data.iloc[idx, 0]

In [56]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = ImageDataset(
    csv_file='/kaggle/input/dl-63-cw-image-classification/train.csv',  # Correct path to train.csv
    root_dir='/kaggle/input/dl-63-cw-image-classification/train',  # Directory containing images
    transform=transform
)

test_dataset = ImageDataset(
    csv_file='/kaggle/input/dl-63-cw-image-classification/test.csv',  # Correct path to test.csv
    root_dir='/kaggle/input/dl-63-cw-image-classification/test/',  # Directory containing test images
    transform=transform, is_test=True
)

In [57]:
train_size = int(0.8 * len(train_dataset))
valid_size = len(train_dataset) - train_size

trainset, validset = random_split(train_dataset, [train_size, valid_size])

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(validset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
# class TestDataset(Dataset):
#     def __init__(self, img_paths, transform=None):
#         self.img_paths = img_paths
#         self.transform = transform

#     def __len__(self):
#         return len(self.img_paths)

#     def __getitem__(self, idx):
#         img_path = self.img_paths[idx]
#         image = Image.open(img_path).convert('RGB')

#         if self.transform:
#             image = self.transform(image)

#         return image

# # Load test data
# test_df = pd.read_csv('/kaggle/input/dl-63-cw-image-classification/test.csv')
# image_paths = test_df['filepaths'].values
# image_ids = test_df['ID'].values
# base_path = '/kaggle/input/dl-63-cw-image-classification/test/'
# image_paths = [os.path.join(base_path, path) for path in image_paths]

# testset = TestDataset(img_paths=image_paths, transform=transform)
# testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [46]:
class CustomVGG16(nn.Module):
    def __init__(self, num_classes=71):  # Ensure correct number of classes
        super(CustomVGG16, self).__init__()

        # Load pre-trained VGG16 model
        self.base_model = models.vgg16(pretrained=True)

        # Freeze the feature extraction layers
        for param in self.base_model.features.parameters():  # Freeze only features, not classifier
            param.requires_grad = False

        # Modify the classifier part of the model
        self.base_model.classifier = nn.Sequential(
            nn.Linear(25088, 256),  # Input size from VGG16 architecture
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.base_model.features(x)  # Feature extraction
        x = self.base_model.avgpool(x)   # Avg pooling
        x = torch.flatten(x, 1)          # Flatten
        x = self.base_model.classifier(x)  # Classifier
        return x

In [47]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CustomVGG16(num_classes=71).to(device)
optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

def train_and_val_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=25):
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        model.train()  # Set to training mode
        running_loss, running_corrects = 0.0, 0

        # Training loop
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        print(f'Training - Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

        # Validation loop
        model.eval()  # Set to evaluation mode
        val_corrects = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                val_corrects += torch.sum(preds == labels.data)

        val_acc = val_corrects.double() / len(val_loader.dataset)
        print(f'Validation Accuracy: {val_acc:.4f}')

        # Save the best model based on validation accuracy
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')




In [51]:
train_and_val_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10)

Training - Epoch 1/10, Loss: 4.2668, Accuracy: 0.2391
Validation Accuracy: 0.5742
Training - Epoch 2/10, Loss: 2.1124, Accuracy: 0.4520
Validation Accuracy: 0.7208
Training - Epoch 3/10, Loss: 1.6236, Accuracy: 0.5615
Validation Accuracy: 0.7541
Training - Epoch 4/10, Loss: 1.3063, Accuracy: 0.6430
Validation Accuracy: 0.7830
Training - Epoch 5/10, Loss: 1.1402, Accuracy: 0.6825
Validation Accuracy: 0.7836
Training - Epoch 6/10, Loss: 1.0280, Accuracy: 0.7122
Validation Accuracy: 0.8006
Training - Epoch 7/10, Loss: 0.9986, Accuracy: 0.7346
Validation Accuracy: 0.8233
Training - Epoch 8/10, Loss: 0.8942, Accuracy: 0.7576
Validation Accuracy: 0.8082
Training - Epoch 9/10, Loss: 0.8404, Accuracy: 0.7786
Validation Accuracy: 0.8220
Training - Epoch 10/10, Loss: 0.7864, Accuracy: 0.7881
Validation Accuracy: 0.8226


In [52]:
# Unfreeze the last few layers for fine-tuning
for param in model.base_model.features[15:].parameters():
    param.requires_grad = True

# Re-define optimizer for fine-tuning
optimizer = optim.SGD(model.parameters(), lr=0.001)

# Fine-tune the model
train_and_val_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10)

Training - Epoch 1/10, Loss: 0.5686, Accuracy: 0.8320
Validation Accuracy: 0.8472
Training - Epoch 2/10, Loss: 0.5264, Accuracy: 0.8309
Validation Accuracy: 0.8484
Training - Epoch 3/10, Loss: 0.4329, Accuracy: 0.8614
Validation Accuracy: 0.8648
Training - Epoch 4/10, Loss: 0.4287, Accuracy: 0.8567
Validation Accuracy: 0.8560
Training - Epoch 5/10, Loss: 0.4017, Accuracy: 0.8715
Validation Accuracy: 0.8635
Training - Epoch 6/10, Loss: 0.3914, Accuracy: 0.8693
Validation Accuracy: 0.8629
Training - Epoch 7/10, Loss: 0.3582, Accuracy: 0.8790
Validation Accuracy: 0.8698
Training - Epoch 8/10, Loss: 0.3769, Accuracy: 0.8752
Validation Accuracy: 0.8660
Training - Epoch 9/10, Loss: 0.3473, Accuracy: 0.8817
Validation Accuracy: 0.8673
Training - Epoch 10/10, Loss: 0.3533, Accuracy: 0.8817
Validation Accuracy: 0.8723


In [None]:
# model.eval()
# test_loss, correct_test = 0.0, 0
# with torch.no_grad():
#     for images, labels in test_loader:
#         images, labels = images.cuda(), labels.cuda()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         test_loss += loss.item() * images.size(0)
#         _, predicted = torch.max(outputs.data, 1)
#         correct_test += (predicted == labels).sum().item()

# test_loss /= len(test_loader.dataset)
# test_accuracy = correct_test / len(test_loader.dataset)
# print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

In [66]:
def evaluate_model_on_test_set(model, test_loader, device, class_mapping):
    model.eval()
    predictions = []
    ids = []
    
    with torch.no_grad():
        for images, image_ids in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted_classes = torch.max(outputs, 1)
            
            # Chuyển predicted_classes từ số thành tên class
            predicted_classes = [class_mapping[p.item()] for p in predicted_classes]
            
            # Lưu lại image_ids từ test_loader thay vì sử dụng tensor index
            predictions.extend(predicted_classes)
            ids.extend(image_ids)  # image_ids chính là các ID từ file test.csv
    
    return ids, predictions

# Map số index về class_name
class_mapping = {i: f'class_{i+1}' for i in range(71)}

In [67]:
ids, predictions = evaluate_model_on_test_set(model, test_loader, device, class_mapping)

In [70]:
renamed_ids = list(range(len(ids)))

In [71]:
results = pd.DataFrame({
    'ID': renamed_ids, 
    'TARGET': predictions 
})

# Lưu kết quả thành file CSV
results.to_csv('submission1.csv', index=False)

In [72]:
results

Unnamed: 0,ID,TARGET
0,0,class_58
1,1,class_58
2,2,class_7
3,3,class_17
4,4,class_71
...,...,...
1395,1395,class_69
1396,1396,class_51
1397,1397,class_65
1398,1398,class_46
