In [1]:
import torch
import torch.nn as nn
import cv2 as cv
import numpy as np

# PyTorch定义模型架构
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3) # 3通道输入，32个3x3卷积核
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2)
        self.drop1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3) # 第二个卷积层，64个3x3卷积核
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2)
        self.drop2 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3) # 128个3x3卷积核
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2)
        self.drop3 = nn.Dropout(0.25)

        self.fc1 = nn.Linear(128 * 14 * 14, 512) # 全连接层
        self.bn4 = nn.BatchNorm1d(512)
        self.drop4 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(512, 1) # 输出层

    def forward(self, x):
        x = self.drop1(self.pool1(self.bn1(torch.relu(self.conv1(x))))) # 卷积 -> 批标准化 -> 池化 -> dropout
        x = self.drop2(self.pool2(self.bn2(torch.relu(self.conv2(x)))))
        x = self.drop3(self.pool3(self.bn3(torch.relu(self.conv3(x)))))
        x = x.view(-1, 128 * 14 * 14) # 全连接的一维展开罢
        x = self.drop4(self.bn4(torch.relu(self.fc1(x)))) # 全连接层 -> 批标准化 -> dropout
        x = torch.sigmoid(self.fc2(x)) # 甚至线性整流
        return x

In [13]:
# 加载模型信息
model = CustomCNN()
torch.save(model.state_dict(), 'model.pth')
model.load_state_dict(torch.load('model.pth'))
model.eval()

CustomCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.25, inplace=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop2): Dropout(p=0.25, inplace=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop3): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=25088, out_features=512, bias=True)
  (bn_fc1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [14]:
# 图像预处理
def preprocess(x):
    x = cv.resize(x, (128, 128))
    x = np.transpose(x, (2, 0, 1)) / 255.0
    x = np.expand_dims(x, axis=0)
    x = torch.tensor(x, dtype=torch.float32)
    return x

# 读取并预处理图像
image = cv.imread('images/dog2.jpg')
img_resize = preprocess(image[:, :, ::-1])
out = model(img_resize)
class_id = int(out.item() > 0.5)
class_names = ['猫', '狗'] # 自觉调编码
class_name = class_names[class_id]

# 在图像上显示预测结果
cv.putText(image, class_name, (0, 20), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2)
cv.imshow('预测', image)
cv.imwrite('predict.png', image)
cv.waitKey(0)

-1

In [6]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split

# 确保在GPU上进行训练（如果有可用的话）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 定义卷积神经网络模型
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2)
        self.drop1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2)
        self.drop2 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2)
        self.drop3 = nn.Dropout(0.25)

        self.fc1 = nn.Linear(128 * 14 * 14, 512)
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.drop_fc1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(512, 1)

    def forward(self, x):
        x = self.drop1(self.pool1(self.bn1(nn.ReLU()(self.conv1(x)))))
        x = self.drop2(self.pool2(self.bn2(nn.ReLU()(self.conv2(x)))))
        x = self.drop3(self.pool3(self.bn3(nn.ReLU()(self.conv3(x)))))
        x = x.view(-1, 128 * 14 * 14)
        x = self.drop_fc1(self.bn_fc1(nn.ReLU()(self.fc1(x))))
        x = self.fc2(x)
        return x

# 创建自定义数据集
class CustomDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.df.iloc[idx, 0])
        image = Image.open(img_name)
        label = torch.tensor(self.df.iloc[idx, 1], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label

# 设定图像预处理转换
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# 获取训练数据集文件名
filenames = os.listdir("./input/train")
categories = []

for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

# 加载数据
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_data = CustomDataset(train_df, "./input/train/", transform=transform)
train_loader = DataLoader(train_data, batch_size=15, shuffle=True)

validate_data = CustomDataset(validate_df, "./input/train/", transform=transform)
validate_loader = DataLoader(validate_data, batch_size=15, shuffle=False)

# 初始化模型和优化器
model = CustomCNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.RMSprop(model.parameters())

# 训练模型
FAST_RUN = False
epochs = 3 if FAST_RUN else 50
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

# 保存模型
torch.save(model.state_dict(), "Mymodel.pth")


Epoch 1, Loss: 1.249635543778678
Epoch 2, Loss: 0.8421239944818978
Epoch 3, Loss: 0.768222399543379
Epoch 4, Loss: 0.7208112755389972
Epoch 5, Loss: 0.6644473201899885
Epoch 6, Loss: 0.6543850283199382
Epoch 7, Loss: 0.6196669248219963
Epoch 8, Loss: 0.595413797230364
Epoch 9, Loss: 0.575311314279788
Epoch 10, Loss: 0.5494419381579506
Epoch 11, Loss: 0.5329742747768064
Epoch 12, Loss: 0.5131324593708894
Epoch 13, Loss: 0.46731138375596465
Epoch 14, Loss: 0.43597401420925264
Epoch 15, Loss: 0.4266467717624156
Epoch 16, Loss: 0.37594242919689025
Epoch 17, Loss: 0.3681285474275317
Epoch 18, Loss: 0.3617083544137879
Epoch 19, Loss: 0.3162472882247138
Epoch 20, Loss: 0.2781508854877587
Epoch 21, Loss: 0.2812399364590088
Epoch 22, Loss: 0.2422544777950394
Epoch 23, Loss: 0.24435442951986583
Epoch 24, Loss: 0.24899856684833904
Epoch 25, Loss: 0.2185312349928073
Epoch 26, Loss: 0.22533284892136596
Epoch 27, Loss: 0.23370025057517532
Epoch 28, Loss: 0.19055231884187174
Epoch 29, Loss: 0.1952386

In [15]:
model.load_state_dict(torch.load('Mymodel.pth'))

<All keys matched successfully>

In [17]:
def preprocess(x):
    x = cv.resize(x, (128, 128))
    x = np.transpose(x, (2, 0, 1)) / 255.0
    x = np.expand_dims(x, axis=0)
    x = torch.tensor(x, dtype=torch.float32)
    return x

# 读取并预处理图像
image = cv.imread('images/dog2.jpg')
img_resize = preprocess(image[:, :, ::-1])
out = model(img_resize)
class_id = int(out.item() > 0.5)
class_names = ['cat', 'dog']
class_name = class_names[class_id]

# 在图像上显示预测结果
cv.putText(image, class_name, (0, 20), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2)
cv.imshow('预测', image)
cv.imwrite('predict.png', image)
cv.waitKey(0)

-1