<a href="https://colab.research.google.com/github/Qiaochu-Zhang/C-_Grades_with_Hash_Table/blob/main/basic_CNN1023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [25]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
!pip install torch torchvision
!pip install opencv-python




In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
import cv2
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import KFold, train_test_split
import random

# 检查是否有GPU可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 设置随机种子
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# 自定义数据集类，只检查 .jpg 和 .jpeg 文件
class NoiseDataset(Dataset):
    def __init__(self, image_folder, label):
        self.image_paths = [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.jpeg'))
        ]
        # 过滤无法读取的图片路径
        self.image_paths = [path for path in self.image_paths if cv2.imread(path) is not None]
        self.labels = [label] * len(self.image_paths)
        self.transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor()
        ])

    def extract_noise(self, image_path):
        """提取图片的噪声特征"""
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        blurred = cv2.GaussianBlur(img, (5, 5), 0)
        noise = cv2.absdiff(img, blurred)
        return Image.fromarray(noise)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        noise_image = self.extract_noise(self.image_paths[idx])
        img_tensor = self.transform(noise_image)
        label = self.labels[idx]
        return img_tensor, label

# 定义数据集路径
real_images_path = '/content/drive/MyDrive/Real'
ai_images_path = '/content/drive/MyDrive/fake'

# 加载数据集
real_dataset = NoiseDataset(real_images_path, label=0)
ai_dataset = NoiseDataset(ai_images_path, label=1)

# 合并数据集
full_dataset = real_dataset + ai_dataset

# 打乱数据集并划分测试集（10%）
test_size = 0.1
labels = [label for _, label in full_dataset]
train_val_indices, test_indices = train_test_split(
    range(len(full_dataset)), test_size=test_size, stratify=labels, random_state=42
)

train_val_dataset = Subset(full_dataset, train_val_indices)
test_dataset = Subset(full_dataset, test_indices)

print(f"Training+Validation set size: {len(train_val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

# 定义K折交叉验证
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# 定义 CNN 模型
class NoiseCNN(nn.Module):
    def __init__(self):
        super(NoiseCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 保存最佳模型的权重
best_model_wts = None
best_acc = 0.0

# 开始交叉验证
for fold, (train_ids, val_ids) in enumerate(kfold.split(train_val_indices)):
    print(f'\nFold {fold + 1}')
    print('--------------------------------')

    # 定义训练和验证数据集
    train_loader = DataLoader(
        Subset(train_val_dataset, train_ids), batch_size=8, shuffle=True
    )
    val_loader = DataLoader(
        Subset(train_val_dataset, val_ids), batch_size=8, shuffle=False
    )

    # 初始化模型、损失函数和优化器
    model = NoiseCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    for epoch in range(5):
        model.train()
        running_loss = 0.0

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch + 1}/5], Loss: {avg_loss:.4f}')

    # 验证模型
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    print(f'Fold {fold + 1} Accuracy: {accuracy:.2f}%')

    # 保存表现最佳的模型
    if accuracy > best_acc:
        best_acc = accuracy
        best_model_wts = model.state_dict()

# 加载最佳模型的权重
model.load_state_dict(best_model_wts)

# 在测试集上评估模型
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

test_accuracy = 100 * correct / total
print(f'\nTest Accuracy: {test_accuracy:.2f}%')


Using device: cuda
Training+Validation set size: 268
Test set size: 30

Fold 1
--------------------------------
Epoch [1/5], Loss: 0.7080
Epoch [2/5], Loss: 0.6940
Epoch [3/5], Loss: 0.6911
Epoch [4/5], Loss: 0.6902
Epoch [5/5], Loss: 0.6801
Fold 1 Accuracy: 64.81%

Fold 2
--------------------------------
Epoch [1/5], Loss: 0.6985
Epoch [2/5], Loss: 0.6945
Epoch [3/5], Loss: 0.6925
Epoch [4/5], Loss: 0.6928
Epoch [5/5], Loss: 0.6927
Fold 2 Accuracy: 42.59%

Fold 3
--------------------------------
Epoch [1/5], Loss: 0.7064
Epoch [2/5], Loss: 0.6935
Epoch [3/5], Loss: 0.6949
Epoch [4/5], Loss: 0.6916
Epoch [5/5], Loss: 0.6981
Fold 3 Accuracy: 62.96%

Fold 4
--------------------------------
Epoch [1/5], Loss: 0.7000
Epoch [2/5], Loss: 0.6988
Epoch [3/5], Loss: 0.6938
Epoch [4/5], Loss: 0.6920
Epoch [5/5], Loss: 0.6621
Fold 4 Accuracy: 62.26%

Fold 5
--------------------------------
Epoch [1/5], Loss: 0.7008
Epoch [2/5], Loss: 0.6952
Epoch [3/5], Loss: 0.6928
Epoch [4/5], Loss: 0.6795
Epoc