In [None]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import pandas as pd

# =======================
# 1. CHUẨN HÓA DỮ LIỆU
# =======================
def prepare_imagefolder_structure(src_dir, dst_dir):
    os.makedirs(dst_dir, exist_ok=True)
    for i in range(10):
        os.makedirs(os.path.join(dst_dir, str(i)), exist_ok=True)

    for file in os.listdir(src_dir):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                label = file.split('_')[0]
                label_folder = os.path.join(dst_dir, label)
                if os.path.isdir(label_folder):
                    shutil.copy(
                        os.path.join(src_dir, file),
                        os.path.join(label_folder, file)
                    )
            except:
                continue
    print(f" Đã chuẩn hóa dữ liệu sang ImageFolder tại {dst_dir}")

# =======================
# 2. HÀM TRAIN RESNET18
# =======================
def train_resnet18(data_dir, num_epochs=10, batch_size=32):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    dataset = datasets.ImageFolder(root=data_dir, transform=transform)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 10)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}")

    # Đánh giá
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    print(f"🎯 Accuracy: {correct / total * 100:.2f}%")
    return model

# =======================
# 3. DỰ ĐOÁN ẢNH MỚI
# =======================
def predict_on_folder(model, input_dir, output_csv="resnet_predictions.csv"):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model = model.to(device)

    results = []
    for file in tqdm(os.listdir(input_dir)):
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(input_dir, file)
            from PIL import Image
            try:
                img = Image.open(img_path).convert('RGB')
                img = transform(img).unsqueeze(0).to(device)
                output = model(img)
                _, pred = torch.max(output, 1)
                results.append([file, pred.item()])
            except:
                continue

    df = pd.DataFrame(results, columns=["filename", "predicted_digit"])
    df.to_csv(output_csv, index=False)
    print(f"✅ Đã lưu kết quả vào {output_csv}")

# =======================
# 4. MAIN PIPELINE
# =======================
def main():
    merged_data = r"D:\\code_things\\do an cuoi ki mon may hoc\\hand_Written(CNN)\\merged_data"
    resnet_data = r"D:\\code_things\\do an cuoi ki mon may hoc\\hand_Written(CNN)\\resnet_data"
    unlabeled_dir = r"D:\\code_things\\do an cuoi ki mon may hoc\\hand_Written(CNN)\\data no label\\data.2025"

    prepare_imagefolder_structure(merged_data, resnet_data)
    model = train_resnet18(resnet_data)
    predict_on_folder(model, unlabeled_dir)

if __name__ == "__main__":
    main()

✅ Đã chuẩn hóa dữ liệu sang ImageFolder tại D:\\code_things\\do an cuoi ki mon may hoc\\hand_Written(CNN)\\resnet_data




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\reald/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:02<00:00, 21.4MB/s]


[Epoch 1] Loss: 122.1108
[Epoch 2] Loss: 27.1797
[Epoch 3] Loss: 12.0833
[Epoch 4] Loss: 10.2606
[Epoch 5] Loss: 7.3587
[Epoch 6] Loss: 6.1355
[Epoch 7] Loss: 6.1116
[Epoch 8] Loss: 7.4025
[Epoch 9] Loss: 11.3274
[Epoch 10] Loss: 12.0810
🎯 Accuracy: 94.67%


100%|██████████| 9998/9998 [12:06<00:00, 13.75it/s]


✅ Đã lưu kết quả vào resnet_predictions.csv
