In [1]:
import os
if not os.path.exists("./save_model_rs_dataset"):
    os.mkdir("./save_model_rs_dataset")


In [2]:
import torch
from torch import nn

# class MyNet(nn.Module):

#     def __init__(self, num_classes=10) -> None:
#         super().__init__()
#         self.model = nn.Sequential(
#             nn.Conv2d(3, 32, 5, padding=2),
#             nn.MaxPool2d(2),
#             nn.Conv2d(32, 32, 5, padding=2),
#             nn.MaxPool2d(2),
#             nn.Conv2d(32, 64, 5, padding=2),
#             nn.MaxPool2d(2),
#             nn.Flatten(),
#             nn.Linear(1024, 64),
#             nn.Linear(64, class_nums),
#             nn.Softmax(dim=1)
#         )

#     def forward(self, x):
#         x = self.model(x)
#         return x


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import torchvision.datasets
import numpy as np
from torchvision import datasets
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from torchvision.transforms import transforms


data_transform = transforms.Compose([
    transforms.Resize([299,299]),    # 处理图像大小
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomVerticalFlip(),    # 随机垂直翻转
    transforms.RandomRotation(45),       # 随机旋转
    transforms.ColorJitter(brightness=0.1, contrast=(0.75, 1.5), saturation=(0.75, 1.5), hue=0.15),  # 随机颜色变换
    transforms.ToTensor()     # 仅对数据做转换为 tensor 格式操作
])
data_transform_test = transforms.Compose([
    transforms.Resize([299,299]),    # 处理图像大小
    transforms.ToTensor()     # 仅对数据做转换为 tensor 格式操作
])

# 每次取多少张图象进行训练
Batch_size = 16

# 使用自己的数据集
train_dataset = datasets.ImageFolder(r"C:\Users\lhc25\Desktop\DR\classified_images_train",transform=data_transform)
# 使用官方数据集
# train_dataset = torchvision.datasets.CIFAR10("dataset", train=True, transform=data_transform, download=True)
train_dataloader = DataLoader(dataset=train_dataset,batch_size=Batch_size,shuffle=True,num_workers=2)

test_dataset = datasets.ImageFolder(r"C:\Users\lhc25\Desktop\DR\classified_images_test",transform=data_transform_test)
# test_dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=data_transform, download=True)
test_dataloader = DataLoader(dataset=test_dataset,batch_size=Batch_size,shuffle=True,num_workers=2)

# 长度 = 数据集个数 / batch_size
# print(len(train_dataloader))

# 获取数据集类别数量
classes = test_dataset.classes

# 初始化混淆矩阵
cnf_matrix = np.zeros([len(classes), len(classes)])


In [4]:
import torchvision.models as models

# 导入预训练的 InceptionV3 模型
model = models.inception_v3(pretrained=True)

model.fc = nn.Linear(model.fc.in_features, len(classes))
# 如果GPU可用，利用GPU进行训练
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=model.to(device)
model.to('cuda')



Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [5]:
from torch.optim import lr_scheduler

# 4. 损失函数
loss_fn = nn.CrossEntropyLoss()


# 学习率
learning_rate = 0.001
# 5. 优化器
# 定义优化器（SGD：随机梯度下降）
# optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 学习率衰减⽅法：学习率每隔 step_size 个 epoch 变为原来的 gamma
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)



# 训练轮数
epoch = 100

# 保存训练过程中的loss和精度
train_acc_lst, test_acc_lst = [], []
train_loss_lst, tset_loss_lst = [], []

# 记录训练过程中最大的精度
max_train_acc = 0
max_test_acc = 0


In [6]:
import numpy as np
# 单通道转为三通道
def transfer_channel(image):
    image = np.array(image)
    image = image.transpose((1, 0, 2, 3))             # array 转置
    image = np.concatenate((image, image, image), axis=0)
    image = image.transpose((1, 0, 2, 3))     # array 转置回来
    image = torch.tensor(image)               # 将 numpy 数据格式转为 tensor
    return image


In [7]:
def compute_accuracy_and_loss(model, dataset, data_loader, device):
    correct, total = .0, .0
    for i, (features, targets) in enumerate(data_loader):
        # 通道转换
        if features.size(1) == 1:
            features = transfer_channel(features)
        features = features.to(device)
        targets = targets.to(device)
        output = model(features)
        currnet_loss = loss_fn(output, targets)
        # 求预测结果精确度之和
        # argmax:求最大值的下标，1按行求，0按列求
#         correct += (output.argmax(1) == targets).sum()
        
        _, predicted_labels = torch.max(output, 1)
        correct += (predicted_labels == targets).sum()
        
        # 更新混淆矩阵数据
        for idx in range(len(targets)):
            cnf_matrix[targets[idx]][predicted_labels[idx]] += 1
        
        total += targets.size(0)
        
    return float(correct) * 100 / len(dataset), currnet_loss.item()


In [8]:
import time
start_time = time.time()

print(model)

for i in range(epoch):
    print("---------开始第{}/{}轮训练，本轮学习率为：{}---------".format((i + 1), epoch, lr_scheduler.get_last_lr()))
    # 记录每轮训练批次数，每100次进行一次输出
    count_train = 0
    
    # 训练步骤开始
    model.train() # 将网络设置为训练模式，当网络包含 Dropout, BatchNorm时必须设置，其他时候无所谓
    for (features, targets) in train_dataloader:
        # 通道转换
        if features.size(1) == 1:
            features = transfer_channel(features)
        # 将图像和标签移动到指定设备上
        features = features.to(device)
        targets = targets.to(device)
        
        # 梯度清零，也就是把loss关于weight的导数变成0.
        # 进⾏下⼀次batch梯度计算的时候，前⼀个batch的梯度计算结果，没有保留的必要了。所以在下⼀次梯度更新的时候，先使⽤optimizer.zero_grad把梯度信息设置为0。
        optimizer.zero_grad()
        
        # 获取网络输出
        output = model(features)
        output=output.logits
        # 获取损失
        loss = loss_fn(output, targets)
        
        # 反向传播
        loss.backward()
        # 训练
        optimizer.step()
        # 纪录训练次数
        count_train += 1
        # item()函数会直接输出值，比如tensor(5),会输出5
        if count_train % 100 == 0:
            # 记录时间
            end_time = time.time()
            print(f"训练批次{count_train}/{len(train_dataloader)}，loss：{loss.item():.3f}，用时：{(end_time - start_time):.2f}" )

    # 将网络设置为测试模式，当网络包含 Dropout, BatchNorm时必须设置，其他时候无所谓
    model.eval()
    with torch.no_grad():
        # 计算训练精度
        train_accuracy, train_loss = compute_accuracy_and_loss(model, train_dataset, train_dataloader, device=device)
        # 更新最高精度
        if train_accuracy > max_train_acc:
            max_train_acc = train_accuracy
        
        # 计算测试精度
        test_accuracy, test_loss = compute_accuracy_and_loss(model, test_dataset, test_dataloader, device=device)
        # 更新最高精度
        if test_accuracy > max_test_acc:
            max_test_acc = test_accuracy
        
        # 收集训练过程精度和loss
        train_loss_lst.append(train_loss)
        train_acc_lst.append(train_accuracy)
        tset_loss_lst.append(test_loss)
        test_acc_lst.append(test_accuracy)
        
        print(f'Train Loss.: {train_loss:.2f}' f' | Validation Loss.: {test_loss:.2f}')
        print(f'Train Acc.: {train_accuracy:.2f}%' f' | Validation Acc.: {test_accuracy:.2f}%')

    # 训练计时
    elapsed = (time.time() - start_time) / 60
    print(f'本轮训练累计用时: {elapsed:.2f} min')

    # 保存达标的训练的模型
    if test_accuracy > 70:
        torch.save(model, "save_model_rs_dataset/train_model_{}.pth".format(i))
        print("第{}次训练模型已保存".format(i + 1))
    
    # 更新学习率
    lr_scheduler.step()

print('DONE！')


Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

KeyboardInterrupt: 

In [11]:
# 定义保存路径
save_path = r'C:\Users\lhc25\Desktop\DR\model.pth'

# 保存模型的状态字典（state_dict）
torch.save(model.state_dict(), save_path)

In [13]:
load_path=r"C:\Users\lhc25\Desktop\DR\train_model_99.pth"
model=torch.load(load_path)

In [16]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# 假设测试集图像存储在 test_images 文件夹中
test_data_folder = r"C:\Users\lhc25\Desktop\DR\googlenet"



# 创建测试集数据集
test_dataset = ImageFolder(root=test_data_folder, transform=data_transform_test)

# 创建 DataLoader，注意设置 shuffle=False
test_dataloader = DataLoader(test_dataset, batch_size=Batch_size, shuffle=False, num_workers=2)

# 将模型移到 GPU 上（如果有的话）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 将模型设置为评估模式
model.eval()

# 存储预测结果的列表
all_predictions = []

# 不进行梯度计算
with torch.no_grad():
    for inputs, _ in test_dataloader:
        inputs = inputs.to(device)

        # 获取模型的输出
        outputs = model(inputs)

        # 在预测维度上取最大值
        _, predictions = torch.max(outputs, 1)

        # 将预测结果添加到列表中
        all_predictions.extend(predictions.cpu().numpy())

# all_predictions 包含了测试集的所有预测结果
print(all_predictions)


[3, 0, 2, 2, 2, 0, 3, 0, 3, 0, 0, 2, 3, 3, 0, 0, 3, 0, 0, 0, 2, 2, 3, 0, 3, 3, 0, 3, 3, 2, 3, 0, 2, 3, 0, 3, 3, 0, 0, 3, 0, 3, 2, 3, 3, 0, 0, 3, 3, 3, 3, 0, 0, 3, 3, 0, 3, 0, 0, 0, 0, 2, 0, 2, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 3, 0, 2, 2, 0, 3, 0, 3, 3, 3, 0, 3, 3, 3, 0, 3, 2, 0, 2, 3, 3, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 0, 2, 0, 3, 2, 0, 2, 0, 3, 0, 3, 0, 3, 2, 2, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 3, 3, 3, 3, 3, 3, 0, 3, 0, 0, 0, 0, 3, 3, 3, 3, 0, 2, 0, 0, 2, 2, 3, 2, 0, 2, 0, 3, 1, 0, 2, 3, 3, 3, 0, 3, 2, 3, 1, 0, 0, 0, 0, 0, 3, 0, 2, 3, 0, 0, 0, 3, 3, 0, 0, 2, 0, 0, 2, 3, 3, 2, 2, 0, 2, 3, 0, 0, 3, 3, 3, 2, 0, 2, 0, 2, 0, 2, 0, 3, 2, 3, 0, 2, 0, 0, 2, 0, 3, 3, 0, 3, 0, 0, 3, 0, 2, 2, 0, 0, 3, 3, 0, 3, 0, 1, 3, 3, 0, 3, 3, 3, 3, 0, 2, 2, 2, 0, 0, 3, 3, 2, 0, 3, 3, 2, 0, 2, 0, 0, 0, 0, 3, 0, 3, 0, 0, 2, 3, 0, 0, 0, 2, 0, 2, 0, 0, 3, 0, 2, 2, 0, 3, 2, 3, 0, 0, 3, 0, 3, 0, 0, 0, 3, 3, 2, 3, 3]


In [23]:
import pandas as pd

# 假设 all_predictions 是包含测试集每个图像预测结果的列表
# 假设 test_dataset 是你的测试集数据集对象

# 获取测试集图像文件名列表
image_file_names = [path.split('/')[-1] for path, _ in test_dataset.imgs]

# # 创建一个包含图片名和预测结果的 DataFrame
# df = pd.DataFrame({'Image': image_file_names, 'Predicted_Class': all_predictions})

# 定义输出 CSV 文件路径
csv_output_path = r"C:\Users\lhc25\Desktop\DR\submission.csv"

# 获取测试集图像文件名列表（不包含扩展名）
image_file_names = [os.path.splitext(os.path.basename(path))[0] for path, _ in test_dataset.imgs]

# 创建一个包含图片名和预测结果的 DataFrame
df = pd.DataFrame({'Image': image_file_names, 'Predict': all_predictions})



# 将 DataFrame 写入 CSV 文件
df.to_csv(csv_output_path, index=False)

# 打印 DataFrame
print(df)


    Image  Predict
0       1        3
1      10        0
2     100        2
3     101        2
4     102        2
..    ...      ...
295    95        3
296    96        3
297    97        2
298    98        3
299    99        3

[300 rows x 2 columns]


In [None]:
import matplotlib.pyplot as plt


plt.figure(dpi=480,figsize=(12,5))

# 训练损失和测试损失关系图
plt.plot(range(1, epoch + 1), train_loss_lst, label='Training loss')
plt.plot(range(1, epoch + 1), tset_loss_lst, label='Validation loss')
plt.legend(loc='upper right')
plt.ylabel('Cross entropy')
plt.xlabel('Epoch')
plt.show()


plt.figure(dpi=480,figsize=(12,5))
# 训练精度和测试精度关系图
plt.plot(range(1, epoch + 1), train_acc_lst, label='Training accuracy')
plt.plot(range(1, epoch + 1), test_acc_lst, label='Validation accuracy')
plt.legend(loc='upper left')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()


print("最大训练精度为：", max_train_acc)
print("最大测试精度为：", max_test_acc)


In [None]:
import itertools
import matplotlib.pyplot as plt
import numpy as np


# 绘制混淆矩阵
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    - cm : 计算出的混淆矩阵的值
    - classes : 混淆矩阵中每一行每一列对应的列
    - normalize : True:显示百分比, False:显示个数
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print("显示百分比：")
        np.set_printoptions(formatter={'float': '{: 0.2f}'.format})
#         print(cm)
#     else:
#         print('显示具体数字：')
#         print(cm)
    plt.figure(dpi=320,figsize=(16,16))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    # matplotlib版本问题，如果不加下面这行代码，则绘制的混淆矩阵上下只能显示一半，有的版本的matplotlib不需要下面的代码，分别试一下即可
    plt.ylim(len(classes) - 0.5, -0.5)
    # fmt = '.2f' if normalize else 'd'
    fmt = '.2f'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()


# 第一种情况：显示百分比
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True, title='Normalized confusion matrix')

# 第二种情况：显示数字
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=False, title='Normalized confusion matrix')


In [None]:

from PIL import features
from torch.utils.data import DataLoader

import torch
import torchvision
from torchvision import datasets

from torchvision.transforms import transforms

import matplotlib.pyplot as plt

# 对图像进行尺寸变换，因为网络要求的输入是64*64，并且是tensor类型
custom_transform = transforms.Compose([transforms.Resize([224, 224]),
                                       transforms.ToTensor()])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torchvision.models.vgg16().to(device)
# map_location:指定设备，cpu或者GPU
model.load_state_dict(torch.load("./save_model_rs_dataset/vgg16_train_model_38.pth", map_location="cpu"))

val_dataset = datasets.ImageFolder(
    root=r'E:\machine learning\Deep_learning\deep_learning\PyTorch\code\some_models\vgg-demo\VGG16\satelite\Satellite_Image_Classification\val',
    transform=custom_transform
)
classes = val_dataset.classes
val_loader = DataLoader(dataset=val_dataset,
                        batch_size=16,
                        shuffle=True)

for features, targets in val_loader:
    predictions = model.forward(features.to(device))
    predictions = torch.argmax(predictions, dim=1)
    plt.figure(figsize=(15, 15))  # 设置窗口大小

    for i in range(len(features)):
        plt.subplot(4, 4, i + 1)
        plt.title("Prediction:{}\nTarget:{}".format(classes[predictions[i]], classes[targets[i]]))
        # 解决报错：Invalid shape (3, 224, 224) for image data
        # 问题产生的原因是由于matplotlib.pyplot 使用时传入的数组型或Tensor型参数应为 img=（224，224，3）这种类型。
        # 其中img[0],img[1]为数组或张量的长与宽,img[2]为维度，如‘RPG’为3
        img = features[i].swapaxes(0, 1)
        img = img.swapaxes(1, 2)
        plt.imshow(img)
        # 关闭坐标轴
        plt.axis('off')

    plt.show()
    break
