In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvBlock(nn.Module):
    """卷积块：Conv2d -> BatchNorm2d -> ReLU"""
    def __init__(self, in_channels, out_channels, stride=2):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=3, stride=stride,
            padding=1
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class UpConvBlock(nn.Module):
    """上采样块：ConvTranspose2d -> BatchNorm2d -> ReLU"""
    def __init__(self, in_channels, out_channels, stride=2):
        super(UpConvBlock, self).__init__()
        self.upconv = nn.ConvTranspose2d(
            in_channels, out_channels,
            kernel_size=3, stride=stride,
            padding=1, output_padding=1
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.upconv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class SeparableConv2d(nn.Module):
    """深度可分离卷积：Depthwise Conv2d + Pointwise Conv2d"""
    def __init__(self, in_channels, out_channels):
        super(SeparableConv2d, self).__init__()
        self.depthwise = nn.Conv2d(
            in_channels, in_channels,
            kernel_size=3, padding=1,
            groups=in_channels, bias=False
        )
        self.pointwise = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=1, bias=False
        )
    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class ColorizationUNet(nn.Module):
    """用于灰度图像上色的U-Net模型"""
    def __init__(self):
        super(ColorizationUNet, self).__init__()
        # 编码器
        self.enc1 = ConvBlock(1, 128)         # 输入为1通道灰度图像
        self.enc2 = ConvBlock(128, 128)
        self.enc3 = ConvBlock(128, 256)
        self.enc4 = ConvBlock(256, 512)
        self.enc5 = ConvBlock(512, 512)
        # 解码器
        self.dec1 = UpConvBlock(512, 512)
        self.dec2 = UpConvBlock(512 + 512, 256)  # 跳跃连接，通道数加倍
        self.dec3 = UpConvBlock(256 + 256, 128)
        self.dec4 = UpConvBlock(128 + 128, 128)
        self.dec5 = UpConvBlock(128 + 128, 3)    # 输出3通道彩色图像
        # 最后的深度可分离卷积层
        self.final_conv = SeparableConv2d(3 + 1, 3)
        # 激活函数
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        # 编码器路径
        enc1 = self.enc1(x)    # 输出尺寸：(batch_size, 128, 80, 80)
        enc2 = self.enc2(enc1) # 输出尺寸：(batch_size, 128, 40, 40)
        enc3 = self.enc3(enc2) # 输出尺寸：(batch_size, 256, 20, 20)
        enc4 = self.enc4(enc3) # 输出尺寸：(batch_size, 512, 10, 10)
        enc5 = self.enc5(enc4) # 输出尺寸：(batch_size, 512, 5, 5)
        # 解码器路径
        dec1 = self.dec1(enc5)                      # 输出尺寸：(batch_size, 512, 10, 10)
        dec1 = torch.cat((dec1, enc4), dim=1)       # 跳跃连接，通道数为1024
        dec2 = self.dec2(dec1)                      # 输出尺寸：(batch_size, 256, 20, 20)
        dec2 = torch.cat((dec2, enc3), dim=1)       # 通道数为512
        dec3 = self.dec3(dec2)                      # 输出尺寸：(batch_size, 128, 40, 40)
        dec3 = torch.cat((dec3, enc2), dim=1)       # 通道数为256
        dec4 = self.dec4(dec3)                      # 输出尺寸：(batch_size, 128, 80, 80)
        dec4 = torch.cat((dec4, enc1), dim=1)       # 通道数为256
        dec5 = self.dec5(dec4)                      # 输出尺寸：(batch_size, 3, 160, 160)
        # 拼接输入和解码器输出
        dec5 = torch.cat((dec5, x), dim=1)          # 通道数为4
        # 最后的卷积层
        out = self.final_conv(dec5)                 # 输出尺寸：(batch_size, 3, 160, 160)
        out = self.sigmoid(out)                     # 将输出限制在0到1之间
        return out

# 示例用法

model = ColorizationUNet()
# 创建一个示例灰度图像，尺寸为(1, 1, 160, 160)
input_image = torch.randn(1, 1, 160, 160)
# 前向传播
output_image = model(input_image)
print(output_image.shape)  # 输出尺寸应为(1, 3, 160, 160)


torch.Size([1, 3, 160, 160])


In [2]:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms


In [3]:
class ColorizationDataset(Dataset):
    """用于灰度图像上色的自定义数据集"""

    def __init__(self, color_dir, gray_dir, transform=None):
        """
        Args:
            color_dir (string): 彩色图像目录的路径。
            gray_dir (string): 灰度图像目录的路径。
            transform (callable, optional): 可选的变换函数。
        """
        self.color_dir = color_dir
        self.gray_dir = gray_dir
        self.transform = transform

        # 获取彩色和灰度图像的文件列表
        self.color_images = sorted(os.listdir(color_dir))
        self.gray_images = sorted(os.listdir(gray_dir))

        # 确保彩色和灰度图像数量相同
        assert len(self.color_images) == len(self.gray_images), "彩色和灰度图像数量不匹配"

    def __len__(self):
        return len(self.color_images)

    def __getitem__(self, idx):
        # 获取彩色和灰度图像的路径
        color_img_path = os.path.join(self.color_dir, self.color_images[idx])
        gray_img_path = os.path.join(self.gray_dir, self.gray_images[idx])

        # 读取图像
        color_image = Image.open(color_img_path).convert("RGB")
        gray_image = Image.open(gray_img_path).convert("L")  # 灰度图像

        # 可选的变换
        if self.transform:
            color_image = self.transform(color_image)
            gray_image = self.transform(gray_image)

        # 灰度图像需要增加一个通道维度
        # gray_image = gray_image.unsqueeze(0)

        return gray_image, color_image


In [4]:
# 图像尺寸
IMAGE_SIZE = 160

# 定义变换
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),  # 将图像转换为张量，并归一化到 [0, 1]
])

# 数据集路径
color_dir = './landscape/color'
gray_dir = './landscape/gray'

# 创建数据集
dataset = ColorizationDataset(color_dir, gray_dir, transform=transform)

# 创建数据加载器
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [5]:
dataloader.dataset[1][0].shape

torch.Size([1, 160, 160])

In [6]:
# 请确保之前定义的模型代码已被执行
model = ColorizationUNet()
# model.load_state_dict(torch.load('colorization_epoch_10.pth', map_location=device))


In [7]:
criterion = nn.MSELoss()


In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [9]:
# 将模型移动到GPU（如果可用）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 50  # 训练的轮数

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (gray_images, color_images) in enumerate(dataloader):
        # 将数据移动到设备
        gray_images = gray_images.to(device)
        color_images = color_images.to(device)

        # 前向传播
        outputs = model(gray_images)

        # 计算损失
        loss = criterion(outputs, color_images)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 累加损失
        running_loss += loss.item()

    # 计算平均损失
    epoch_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

    # 每隔一定轮数，可以保存模型或可视化结果
    if (epoch + 1) % 1 == 0:
        # 保存模型
        print("Now is saving model")
        torch.save(model.state_dict(), f'./checkpoints/colorization_epoch_{epoch+1}.pth')

        print("Now is visualization part")
        # 可视化结果（选择一个批次）
        model.eval()
        with torch.no_grad():
            sample_gray, _ = next(iter(dataloader))
            sample_gray = sample_gray.to(device)
            output_color = model(sample_gray)
            # 将数据从GPU移动到CPU，并转换为numpy数组
            sample_gray = sample_gray.cpu().numpy()
            output_color = output_color.cpu().numpy()

            # 保存结果到 'record' 文件夹
            for i in range(4):
                # 保存灰度图像
                gray_image = sample_gray[i, 0]  # 灰度图像数据
                plt.imsave(f'record/epoch_{epoch+1}_sample_{i+1}_gray.png', gray_image, cmap='gray')

                # 保存生成的彩色图像
                output_img = np.transpose(output_color[i], (1, 2, 0))  # 转换维度
                plt.imsave(f'record/epoch_{epoch+1}_sample_{i+1}_color.png', output_img)


Epoch [1/50], Loss: 0.0648
Now is saving model
Now is visualization part
Epoch [2/50], Loss: 0.0487
Now is saving model
Now is visualization part
Epoch [3/50], Loss: 0.0352
Now is saving model
Now is visualization part
Epoch [4/50], Loss: 0.0253
Now is saving model
Now is visualization part
Epoch [5/50], Loss: 0.0179
Now is saving model
Now is visualization part
Epoch [6/50], Loss: 0.0133
Now is saving model
Now is visualization part
Epoch [7/50], Loss: 0.0105
Now is saving model
Now is visualization part
Epoch [8/50], Loss: 0.0087
Now is saving model
Now is visualization part
Epoch [9/50], Loss: 0.0074
Now is saving model
Now is visualization part
Epoch [10/50], Loss: 0.0066
Now is saving model
Now is visualization part
Epoch [11/50], Loss: 0.0060
Now is saving model
Now is visualization part
Epoch [12/50], Loss: 0.0054
Now is saving model
Now is visualization part
Epoch [13/50], Loss: 0.0049
Now is saving model
Now is visualization part
Epoch [14/50], Loss: 0.0046
Now is saving model

In [3]:
!jupyter nbconvert --to markdown colorization.ipynb

[NbConvertApp] Converting notebook colorization.ipynb to markdown
[NbConvertApp] Writing 15840 bytes to colorization.md


In [1]:
import os

# 获取当前文件夹名
current_directory = os.getcwd()
folder_name = os.path.basename(current_directory)

# 要执行的CMD命令
cmd_commands = [
    f':: 获取当前文件夹名\nfor %%I in ("%%cd%%") do set "folder_name={folder_name}"',
    f':: 使用当前文件夹名创建一个新的 GitHub 仓库\ngh repo create "{folder_name}" --public',
    ':: 初始化一个新的 Git 仓库\ngit init',
    f':: 将远程仓库添加为 origin\ngit remote add origin https://github.com/Republic1024/{folder_name}.git',
    ':: 从远程仓库的 main 分支拉取最新代码\ngit pull origin main',
    ':: 移除未跟踪的文件，例如 .DS_Store（Mac 系统生成的隐藏文件）\ndel /F /Q .DS_Store',
    ':: 将所有更改添加到暂存区，排除 .pth 文件\ngit add .',
    ':: 提交暂存区的更改并添加提交信息\ngit commit -m "Initial commit or merge or rebase remote changes"',
    ':: 将本地的 main 分支变基到远程 main 分支的最新提交\ngit branch -M main',
    ':: 将本地 main 分支的更改推送到远程 main 分支\ngit push -u origin main'
]

# 将命令写入到run.bat文件
with open("run.bat", "w", encoding="utf-8") as f:
    f.write("\n\n".join(cmd_commands))

print("run.bat 文件已生成！")

# 
# **任务说明**：
# 
# - 将项目内容完整翻译为中文。
# 
# - 根据项目的性质，自动生成适用于 GitHub 项目的 README.md 文件，形式可以是教程或使用说明（根据项目内容自行判断）。
# 
# - README.md 文件应包括以下部分（请不要处理末尾与 git push 相关的部分）：
# 
# - **项目简介**：简要说明项目的目的、功能和应用场景。
# 
# - **环境要求**：列出所需的软件、工具和安装步骤。
# 
# - **神经网络的作用和原理**：解释神经网络的功能和工作原理。（如果存在神经网络部分的话）
# 
# - **代码详解**：详细解释代码的各部分功能、输入、输出和执行步骤，必要时进行逐步讲解。
# 
# - **运行示例**：提供运行代码的示例，展示执行步骤和结果输出。
# 
# - **命令行用法（如果适用）**：如有必要，展示在命令行下的使用方法和相关参数说明。
# 
# - **输出结果说明**：解析代码运行后的输出结果，解释其含义。
# 
# - 确保文档结构清晰，语言流畅，帮助读者快速了解和使用项目。
# 



run.bat 文件已生成！
