In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, classification_report

# 从指定路径加载图片并将其转换为一维向量的函数
def load_image_from_path(img_path):
    """
    此函数用于打开指定路径的图片，将其转换为灰度图，调整大小为 64x64，然后将图片展平为一维向量

    参数:
    img_path (str): 图片的文件路径

    返回:
    np.array: 展平后的图片一维向量
    """
    img = Image.open(img_path)
    img = img.convert('L')  # 转换为灰度图
    img = img.resize((64, 64))  # 调整大小
    return np.array(img).flatten()  # 展平为一维向量

# 自定义数据集类
class ImageDataset(Dataset):
    def __init__(self, images, labels):
        self.images = torch.tensor(images, dtype=torch.float32)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# 定义多层感知机模型
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

# 从指定文件夹加载图片及其对应的标签
def load_images(folder, prefix_to_label):
    """
    此函数用于从指定文件夹加载图片，并根据文件名前缀确定图片的标签

    参数:
    folder (str): 图片所在的文件夹路径
    prefix_to_label (dict): 文件名前缀与对应标签的映射字典

    返回:
    np.array, np.array: 分别为图片数据的数组和对应的标签数组
    """
    images = []
    labels = []
    for filename in os.listdir(folder):
        for prefix, label in prefix_to_label.items():
            if prefix in filename:
                img_path = os.path.join(folder, filename)
                img_flattened = load_image_from_path(img_path)
                images.append(img_flattened)
                labels.append(label)
                break
    return np.array(images), np.array(labels)

# 预测单张图片的函数
def predict_single_image(model, img_path):
    """
    此函数用于使用训练好的模型对单张图片进行分类预测

    参数:
    model (MLP): 训练好的多层感知机模型
    img_path (str): 待预测图片的文件路径

    返回:
    str: 预测的类别（猫或狗）
    """
    img_flattened = load_image_from_path(img_path)
    img_tensor = torch.tensor(img_flattened, dtype=torch.float32).unsqueeze(0)
    with torch.no_grad():
        output = model(img_tensor)
        _, predicted = torch.max(output, 1)
    if predicted.item() == 0:
        return 'cat'
    else:
        return 'dog'

# 主函数
def main():
    prefix_to_label = {'cat.': 0, 'dog.': 1} 
    all_images, all_labels = load_images('D:\\30877\\Documents\\archive\\PetImages', prefix_to_label)

    # 划分训练集和测试集
    train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)

    # 数据归一化
    train_images = train_images / 255.0
    test_images = test_images / 255.0

    # 转换标签为独热编码
    encoder = OneHotEncoder()
    train_labels_encoded = encoder.fit_transform(train_labels.reshape(-1, 1)).toarray()  # 将稀疏矩阵转换为密集矩阵（数组）
    test_labels_encoded = encoder.transform(test_labels.reshape(-1, 1)).toarray()  # 将稀疏矩阵转换为密集矩阵（数组）

    # 创建数据集和数据加载器
    train_dataset = ImageDataset(train_images, train_labels_encoded)
    test_dataset = ImageDataset(test_images, test_labels_encoded)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # 定义模型参数
    input_dim = train_images.shape[1]  
    hidden_dim = 128  
    output_dim = 2  

    model = MLP(input_dim, hidden_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # 训练模型
    epochs = 10
    for epoch in range(epochs):
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.argmax(dim=1))
            loss.backward()
            optimizer.step()

        # 在测试集上评估
        with torch.no_grad():
            correct = 0
            total = 0
            predicted_labels = []
            true_labels = []
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels.argmax(dim=1)).sum().item()
                predicted_labels.extend(predicted.numpy())
                true_labels.extend(labels.argmax(dim=1).numpy())

            accuracy = correct / total
            print(f'Accuracy: {accuracy:.4f}')

            # 混淆矩阵和分类报告
            conf_matrix = confusion_matrix(true_labels, predicted_labels)
            class_report = classification_report(true_labels, predicted_labels)
            print("混淆矩阵:\n", conf_matrix)
            print("分类报告:\n", class_report)

    # 进行单张图片预测
    test_img_path = 'D:\\30877\\Documents\\archive\\PetImages\\cat.00001.jpg'  
    predicted_class = predict_single_image(model, test_img_path)
    print(f'对单张图片的预测结果: {predicted_class}')

if __name__ == "__main__":
    main()

Accuracy: 0.5000
混淆矩阵:
 [[1 0]
 [1 0]]
分类报告:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

Accuracy: 0.5000
混淆矩阵:
 [[1 0]
 [1 0]]
分类报告:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

Accuracy: 0.5000
混淆矩阵:
 [[1 0]
 [1 0]]
分类报告:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, classification_report
from torch.optim.lr_scheduler import ReduceLROnPlateau
import random
from PIL import ImageOps

# 从指定路径加载图片并将其转换为一维向量的函数
def load_image_from_path(img_path):
    """
    此函数用于打开指定路径的图片，将其转换为灰度图，调整大小为 64x64，然后将图片展平为一维向量

    参数:
    img_path (str): 图片的文件路径

    返回:
    np.array: 展平后的图片一维向量
    """
    img = Image.open(img_path)
    img = img.convert('L')  # 转换为灰度图
    img = img.resize((64, 64))  # 调整大小
    return np.array(img).flatten()  # 展平为一维向量

# 从指定路径加载图片并进行数据增强
def load_image_and_augment(img_path):
    img = Image.open(img_path)
    img = img.convert('L')  
    img = img.resize((64, 64))  

    augmented_images = [img]

    # 随机旋转
    rotation_angle = random.randint(-30, 30)  
    augmented_images.append(img.rotate(rotation_angle))

    # 水平翻转
    augmented_images.append(ImageOps.mirror(img))

    # 垂直翻转
    augmented_images.append(ImageOps.flip(img))

    return [np.array(image).flatten() for image in augmented_images]

# 自定义数据集类
class ImageDataset(Dataset):
    def __init__(self, images, labels):
        self.images = torch.tensor(images, dtype=torch.float32)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# 定义多层感知机模型
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_dim, hidden_dim * 2)  
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.layer3(x)
        return x

# 从指定文件夹加载图片及其对应的标签
def load_images(folder, prefix_to_label):
    """
    此函数用于从指定文件夹加载图片，并根据文件名前缀确定图片的标签

    参数:
    folder (str): 图片所在的文件夹路径
    prefix_to_label (dict): 文件名前缀与对应标签的映射字典

    返回:
    np.array, np.array: 分别为图片数据的数组和对应的标签数组
    """
    images = []
    labels = []
    for filename in os.listdir(folder):
        for prefix, label in prefix_to_label.items():
            if prefix in filename:
                img_path = os.path.join(folder, filename)
                augmented_images = load_image_and_augment(img_path)
                images.extend(augmented_images)
                labels.extend([label] * len(augmented_images))
                break
    return np.array(images), np.array(labels)

# 预测单张图片的函数
def predict_single_image(model, img_path):
    """
    此函数用于使用训练好的模型对单张图片进行分类预测

    参数:
    model (MLP): 训练好的多层感知机模型
    img_path (str): 待预测图片的文件路径

    返回:
    str: 预测的类别（猫或狗）
    """
    img_flattened = load_image_from_path(img_path)
    img_tensor = torch.tensor(img_flattened, dtype=torch.float32).unsqueeze(0)
    with torch.no_grad():
        output = model(img_tensor)
        _, predicted = torch.max(output, 1)
    if predicted.item() == 0:
        return 'cat'
    else:
        return 'dog'

# 主函数
def main():
    prefix_to_label = {'cat.': 0, 'dog.': 1} 
    all_images, all_labels = load_images('D:\\30877\\Documents\\archive\\PetImages', prefix_to_label)

    # 划分训练集和测试集
    train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)

    # 数据归一化
    train_images = train_images / 255.0
    test_images = test_images / 255.0

    # 转换标签为独热编码
    encoder = OneHotEncoder()
    train_labels_encoded = encoder.fit_transform(train_labels.reshape(-1, 1)).toarray()  # 将稀疏矩阵转换为密集矩阵（数组）
    test_labels_encoded = encoder.transform(test_labels.reshape(-1, 1)).toarray()  # 将稀疏矩阵转换为密集矩阵（数组）

    # 创建数据集和数据加载器
    train_dataset = ImageDataset(train_images, train_labels_encoded)
    test_dataset = ImageDataset(test_images, test_labels_encoded)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # 定义模型参数
    input_dim = train_images.shape[1]  
    hidden_dim = 128  
    output_dim = 2  

    model = MLP(input_dim, hidden_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    scheduler = ReduceLROnPlateau(optimizer,'min', patience = 3, factor = 0.1)

    # 训练模型
    epochs = 10
    for epoch in range(epochs):
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.argmax(dim=1))
            loss.backward()
            optimizer.step()

        # 在测试集上评估
        with torch.no_grad():
            correct = 0
            total = 0
            predicted_labels = []
            true_labels = []
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels.argmax(dim=1)).sum().item()
                predicted_labels.extend(predicted.numpy())
                true_labels.extend(labels.argmax(dim=1).numpy())

            accuracy = correct / total
            print(f'Epoch {epoch + 1}, Accuracy: {accuracy:.4f}')

            # 混淆矩阵和分类报告
            conf_matrix = confusion_matrix(true_labels, predicted_labels)
            class_report = classification_report(true_labels, predicted_labels)
            print("混淆矩阵:\n", conf_matrix)
            print("分类报告:\n", class_report)

        scheduler.step(loss)

    # 进行单张图片预测
    test_img_path = 'D:\\30877\\Documents\\archive\\PetImages\\cat.00001.jpg'  
    predicted_class = predict_single_image(model, test_img_path)
    print(f'对单张图片的预测结果: {predicted_class}')

if __name__ == "__main__":
    main()

Epoch 1, Accuracy: 0.4286
混淆矩阵:
 [[3 0]
 [4 0]]
分类报告:
               precision    recall  f1-score   support

           0       0.43      1.00      0.60         3
           1       0.00      0.00      0.00         4

    accuracy                           0.43         7
   macro avg       0.21      0.50      0.30         7
weighted avg       0.18      0.43      0.26         7

Epoch 2, Accuracy: 0.5714
混淆矩阵:
 [[0 3]
 [0 4]]
分类报告:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.57      1.00      0.73         4

    accuracy                           0.57         7
   macro avg       0.29      0.50      0.36         7
weighted avg       0.33      0.57      0.42         7

Epoch 3, Accuracy: 0.5714
混淆矩阵:
 [[3 0]
 [3 1]]
分类报告:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         3
           1       1.00      0.25      0.40         4

    accuracy     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
