In [1]:
import numpy as np 
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_35407_eye_LEFT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_53655_eye_RIGHT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_44218_eye_LEFT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_28783_eye_LEFT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_31707_eye_RIGHT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_36150_eye_LEFT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_37192_eye_RIGHT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_31024_eye_RIGHT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_21910_eye_LEFT.png
/kaggle/input/data-cv-224/data224x224/open_eyes/kaggle_flickrfaceshq-dataset-ffhq_50510

In [2]:
import os
import torch
import numpy as np
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt


In [3]:
# Kiểm tra GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Đường dẫn dữ liệu (Kaggle environment)
data_dir = "/kaggle/input/data-cv-01/data"  # Bạn cần upload/unzip dữ liệu tại đây

# Transform ảnh
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Tải dataset
dataset = ImageFolder(data_dir, transform=transform)

# Tách train và val (80/20)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

Device: cuda


In [4]:
print(f"Train dataset size: {len(train_loader.dataset)}")
print(f"Validation dataset size: {len(val_loader.dataset)}")

Train dataset size: 9242
Validation dataset size: 2311


In [5]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 3 channels vì ảnh màu
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)  # kích thước sau 2 lần pool
        self.fc2 = nn.Linear(128, len(dataset.classes))  # số lớp tự động lấy từ dataset

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [6]:
# Khởi tạo model, criterion, optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Hàm train một epoch
def train_one_epoch(model, loader):
    model.train()
    total_loss, correct = 0, 0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(loader), correct / len(loader.dataset)

# Hàm đánh giá
def evaluate(model, loader):
    model.eval()
    total_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(loader), correct / len(loader.dataset)

# Huấn luyện
epochs = 30
for epoch in range(epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc = evaluate(model, val_loader)
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1/30
Train Loss: 0.5510, Train Acc: 0.7971
Val Loss: 0.3198, Val Acc: 0.8892
Epoch 2/30
Train Loss: 0.3259, Train Acc: 0.8808
Val Loss: 0.2752, Val Acc: 0.9061
Epoch 3/30
Train Loss: 0.2708, Train Acc: 0.8988
Val Loss: 0.2361, Val Acc: 0.9143
Epoch 4/30
Train Loss: 0.2393, Train Acc: 0.9116
Val Loss: 0.2272, Val Acc: 0.9169
Epoch 5/30
Train Loss: 0.2106, Train Acc: 0.9256
Val Loss: 0.2061, Val Acc: 0.9247
Epoch 6/30
Train Loss: 0.1907, Train Acc: 0.9320
Val Loss: 0.2417, Val Acc: 0.9074
Epoch 7/30
Train Loss: 0.1668, Train Acc: 0.9393
Val Loss: 0.2140, Val Acc: 0.9221
Epoch 8/30
Train Loss: 0.1526, Train Acc: 0.9443
Val Loss: 0.2098, Val Acc: 0.9217
Epoch 9/30
Train Loss: 0.1298, Train Acc: 0.9559
Val Loss: 0.1887, Val Acc: 0.9342
Epoch 10/30
Train Loss: 0.1160, Train Acc: 0.9593
Val Loss: 0.1993, Val Acc: 0.9234
Epoch 11/30
Train Loss: 0.1053, Train Acc: 0.9620
Val Loss: 0.1962, Val Acc: 0.9308
Epoch 12/30
Train Loss: 0.0934, Train Acc: 0.9686
Val Loss: 0.1872, Val Acc: 0.9364
E

In [7]:
torch.save(model.state_dict(), "/kaggle/working/cnn_model_02.pth")