## 尝试1：利用face_recognition分类

In [None]:
import face_recognition
import os
import csv

# 设置图片文件夹路径
folder_path = "./data"  # 替换为你的图片文件夹路径
output_csv = "image_labels.csv"  # 输出的CSV文件

# 初始化结果列表
labels = []

# 遍历文件夹中的图片
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(folder_path, filename)
        # 加载图片
        image = face_recognition.load_image_file(image_path)
        # 检测人脸
        face_locations = face_recognition.face_locations(image)
        # 标签：1表示含人脸，0表示不含人脸
        label = 1 if len(face_locations) > 0 else 0
        labels.append([filename, label])

# 将标签写入CSV文件
with open(output_csv, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Image', 'Label'])  # 写入表头
    writer.writerows(labels)

print(f"标签已保存到 {output_csv}")

## 尝试2：生成脚本，自己贴标签

In [9]:
import os
import csv

# 设置图片文件夹路径
folder_path = "./data"  
output_csv = "image_list-2.csv"  # 输出的CSV文件

# 初始化结果列表
image_list = []

# 遍历文件夹中的图片
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_list.append([filename, "1"])  

# 按文件名排序
image_list.sort(key=lambda x: x[0].lower())  # 按文件名排序，不区分大小写

# 将文件名写入CSV文件
with open(output_csv, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Image', 'Label'])  # 写入表头
    writer.writerows(image_list)

print(f"图片文件名已保存到 {output_csv}")

图片文件名已保存到 image_list-2.csv


## 加载数据

In [1]:
import pandas as pd
from datasets import Dataset
from PIL import Image
import os

# 图片文件夹路径
image_folder = "data"  # 替换为你的图片文件夹路径
csv_file = "image_list-2.csv"  # 你的 CSV 文件路径

# 读取 CSV 文件
df = pd.read_csv(csv_file)

# 加载图片并创建数据集
def load_image(row):
    image_path = os.path.join(image_folder, row['Image'])
    image = Image.open(image_path).convert("RGB")
    return {"image": image, "label": row['Label']}

dataset = Dataset.from_pandas(df).map(load_image)

train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

Map:   0%|          | 0/972 [00:00<?, ? examples/s]

In [6]:
import torch
from transformers import AutoModel, AutoProcessor
from torch import nn

# 加载模型
ckpt = "google/siglip2-so400m-patch16-384"
model = AutoModel.from_pretrained(ckpt, device_map="auto").eval()
processor = AutoProcessor.from_pretrained(ckpt)

OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [None]:
class BinaryClassifier(nn.Module):
    def __init__(self, base_model, embedding_dim=1152): 
        super(BinaryClassifier, self).__init__()
        self.base_model = base_model
        self.classifier = nn.Linear(embedding_dim, 2)  

    def forward(self, pixel_values=384): # pixel_values: 输入图像的像素值
        # 获取图像特征
        with torch.no_grad():
            outputs = self.base_model(pixel_values=pixel_values)
            image_embeddings = outputs.image_embeds  
        # 通过分类头
        logits = self.classifier(image_embeddings)
        return logits

device = torch.device("cpu")
classifier_model = BinaryClassifier(model).to(device)

OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [7]:
def preprocess(batch):
    images = [image for image in batch["image"]]
    inputs = processor(images=images, return_tensors="pt", padding=True)
    batch["pixel_values"] = inputs["pixel_values"]
    batch["labels"] = torch.tensor(batch["label"], dtype=torch.long)
    return batch

# 应用预处理
train_dataset = train_dataset.map(preprocess, batched=True, remove_columns=["image", "Image", "Label"])
eval_dataset = eval_dataset.map(preprocess, batched=True, remove_columns=["image", "Image", "Label"])

# 设置数据集格式
train_dataset.set_format("torch", columns=["pixel_values", "labels"])
eval_dataset.set_format("torch", columns=["pixel_values", "labels"])

Map:   0%|          | 0/777 [00:00<?, ? examples/s]

NameError: name 'processor' is not defined

## 训练模型

In [None]:
from torch.utils.data import DataLoader
from transformers import AdamW
from tqdm import tqdm

# 创建 DataLoader并设置批量大小
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=16)

# 定义优化器和损失函数
optimizer = AdamW(classifier_model.parameters(), lr=5e-5)
criterion = nn.CrossEntropyLoss()

# 训练循环
num_epochs = 5
classifier_model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in tqdm(train_loader):
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # 前向传播
        outputs = classifier_model(pixel_values)
        loss = criterion(outputs, labels)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

# 评估模型
classifier_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in eval_loader:
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)
        outputs = classifier_model(pixel_values)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Validation Accuracy: {correct / total * 100:.2f}%")

In [None]:
# 保存模型
torch.save(classifier_model.state_dict(), "binary_classifier.pth")

## 加载模型进行推理

In [None]:
# 加载模型
classifier_model.load_state_dict(torch.load("binary_classifier.pth"))
classifier_model.eval()

# 推理示例
def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=[image], return_tensors="pt").to(device)
    with torch.no_grad():
        logits = classifier_model(inputs["pixel_values"])
        probs = torch.softmax(logits, dim=1)
        predicted_label = torch.argmax(probs, dim=1).item()
    return predicted_label, probs[0][predicted_label].item()

# 测试单张图片
image_path = "path/to/test/image.jpg"
label, confidence = predict_image(image_path)
print(f"Predicted Label: {label}, Confidence: {confidence:.4f}")