In [2]:
import torch
from torch import nn
from utils import RecDataset
from torch.utils.data import DataLoader

In [3]:
rec_dataset = RecDataset('CVL', 'train')
dataloader = DataLoader(rec_dataset, batch_size=1, shuffle=True)

In [4]:
for data, label in dataloader:
    print(data.shape)
    print(label.shape)
    break

torch.Size([1, 1, 128, 128])
torch.Size([1, 64, 128])


In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # input size: 1x128x128
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv5 = nn.Conv2d(256, 512, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))    # 32x64x64
        x = self.pool(self.relu(self.conv2(x)))    # 64x32x32
        x = self.pool(self.relu(self.conv3(x)))    # 128x16x16
        x = self.pool(self.relu(self.conv4(x)))    # 256x8x8
        x = self.pool(self.relu(self.conv5(x)))    # 512x4x4
        x = self.adaptive_pool(x)                  # 512x1x1
        x = x.view(x.size(0), -1)
        return x

cnn = CNN()
print(cnn)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (adaptive_pool): AdaptiveAvgPool2d(output_size=(1, 1))
)


In [9]:
lstm = nn.LSTM(512, 256, 2, batch_first=True)

data = torch.randn(1, 1, 128, 128)
output = cnn(data)
print(output.shape)
output = output.view(1, 1, 512)
output, _ = lstm(output)

torch.Size([1, 512])


In [1]:
import torch
import torch.nn as nn

# 嵌入层示例
vocab_size = 10  # 假设我们有10个不同的单词
embedding_dim = 3  # 每个单词映射到一个3维向量
embedding_layer = nn.Embedding(vocab_size, embedding_dim)

# 假设输入是单词索引的列表
word_indices = torch.tensor([0, 2, 4, 8], dtype=torch.long)  # 单词索引
embeddings = embedding_layer(word_indices)  # 通过嵌入层获取向量
print("Embeddings:", embeddings)

# 全连接层示例
input_dim = 3  # 输入维度
output_dim = 2  # 输出维度
fc_layer = nn.Linear(input_dim, output_dim)

# 假设输入是一个3维向量
input_vector = torch.randn(1, input_dim)  # 随机生成输入
output_vector = fc_layer(input_vector)  # 通过全连接层得到输出
print("Output from FC layer:", output_vector)

Embeddings: tensor([[-1.8311,  1.2062,  0.1030],
        [-0.0183, -1.5122,  0.4178],
        [ 1.3398,  0.9618,  0.6971],
        [ 1.5470,  0.3003, -1.1612]], grad_fn=<EmbeddingBackward0>)
Output from FC layer: tensor([[0.1274, 0.2353]], grad_fn=<AddmmBackward0>)
