In [None]:
import os
import cv2
import pandas as pd
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch

In [18]:

class CustomImageDataset(Dataset):
    def __init__(self, txt_file, img_dir, transform=None):
        self.img_dir = img_dir
        self.labels = self._load_labels(txt_file)  # 从 TXT 文件加载标签
        self.transform = transform

    def _load_labels(self, txt_file):
        with open(txt_file, 'r') as f:
            labels = [int(line.strip()) for line in f.readlines()]  # 读取标签
        return labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, f'{idx + 1:06d}.jpg')  # 图片名为 000001.jpg, 000002.jpg, 共六位数
        image = cv2.imread(img_name)  # 使用 OpenCV 加载图片
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # 将 BGR 转换为 RGB
        label = self.labels[idx]  # 获取标签

        if self.transform:
            image = self.transform(image)

        return image, label


In [58]:
# 提取性别标签
df = pd.read_csv('origindata/Lables.txt', sep=r'\s+', header=None, skiprows=1, on_bad_lines='warn')

specified_column = df[20]

# 将提取的列保存为新的 TXT 文件
specified_column.to_csv('dataset/sexlable.txt', index=False, header=False)

In [60]:
# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# 图片存储在 'ImageData' 目录下
dataset = CustomImageDataset(txt_file='dataset/sexlable.txt', img_dir='ImageData', transform=transform)

# 创建数据加载器
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [12]:
# 创建训练模型
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)

        # 根据输入图像大小计算线性层的输入维度
        self.fc1 = torch.nn.Linear(64 * 44 * 54, 512)  # 178x218经过卷积和池化后的输出大小
        self.fc2 = torch.nn.Linear(512, 1)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 44 * 54)  # 调整为展平操作
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

In [13]:
# 初始化模型、损失函数和优化器
model = SimpleCNN()
criterion = torch.nn.BCELoss()  # 交叉熵损失
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        labels = labels.float().view(-1, 1)  # 调整标签形状
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_accuracy = correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.4f}')

D:\jupyter
