In [1]:
import kagglehub
path = adilshamim8_rock_paper_scissors_path = kagglehub.dataset_download('adilshamim8/rock-paper-scissors')

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /Users/cheng/.cache/kagglehub/datasets/adilshamim8/rock-paper-scissors/versions/1


In [2]:
import os
import pandas as pd
import torch
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

In [7]:
# 設定資料路徑
DATA_DIR = os.path.join(path, 'train', 'train')
ANNOTATION_FILE = os.path.join(DATA_DIR, '_annotations.csv')

# 自訂 Dataset 類別
class RockPaperScissorsDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    # def __getitem__(self, idx):
    #     img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
    #     image = Image.open(img_path).convert("RGB")
    #     label_str = self.img_labels.iloc[idx, 1]
    #     label_map = {'rock': 0, 'paper': 1, 'scissors': 2}
    #     label = label_map[label_str]
    #     if self.transform:
    #         image = self.transform(image)
    #     return image, label

    def __getitem__(self, idx):
        try:
            img_path = os.path.join(self.img_dir, self.img_labels.loc[idx, 'filename'])
            image = Image.open(img_path).convert("RGB")
            label_str = self.img_labels.loc[idx, 'class']
            label_map = {'Rock': 0, 'Paper': 1, 'Scissors': 2}
            label = label_map[label_str]
            if self.transform:
                image = self.transform(image)
            return image, label
        except KeyError as e:
            print(f"KeyError in __getitem__: {e}")
            print(f"Index value: {idx}")
            print(f"img_labels content:\n{self.img_labels.head()}")
            raise

In [8]:
# 圖像轉換與切分資料集
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = RockPaperScissorsDataset(ANNOTATION_FILE, DATA_DIR, transform=transform)
from torch.utils.data import Subset
import numpy as np

# Create indices for the dataset
indices = np.arange(len(dataset))
train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

# Create subsets using the indices
train_set = Subset(dataset, train_indices)
val_set = Subset(dataset, val_indices)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)

In [9]:
# 設定裝置與模型
device = torch.device('mps') # use 'mps' for MacOS, 'cuda' for Nvidia GPU, or 'cpu'
model = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2)
model.fc = nn.Linear(model.fc.in_features, 3)
model = model.to(device)

# 設定損失與優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# 訓練模型
EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1/5, Loss: 0.5741
Epoch 2/5, Loss: 0.1755
Epoch 3/5, Loss: 0.1368
Epoch 4/5, Loss: 0.1109
Epoch 5/5, Loss: 0.1120


In [10]:
# 儲存模型權重
torch.save(model.state_dict(), "rps_resnext_model.pth")
print("模型已儲存為 rps_resnext_model.pth")

模型已儲存為 rps_resnext_model.pth
