# 1. Data Generation and Data Preparation

## 1.1 Data Generation

In [1]:
import numpy as np

np.random.seed(42)

M, N = 5, 4

direct_choices = [-1, 1]
basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])

squares = []
directions = []
for _ in range(M):
    points = []
    base = np.random.randint(N, size=1)  # 基址
    d = np.random.choice(direct_choices, replace=True)
    offset_indices = range(0, d * N, d)
    for offset in offset_indices:
        p = basic_corners[(base + offset) % N].reshape(-1)
        n = np.random.randn(2) * 0.1
        points.append(p + n)
    squares.append(points)
    directions.append(d)

for points, direct in zip(squares, directions):
    for point in points:
        print(point)
    print(direct)
    print("-" * 50)

[0.88881199 1.03189022]
[ 1.02790413 -0.89894847]
[-1.05808781 -1.05251698]
[-1.05713802  0.90759172]
1
--------------------------------------------------
[-1.17249178  0.94377125]
[-1.10128311 -0.96857527]
[ 0.90919759 -1.14123037]
[1.14656488 0.97742237]
-1
--------------------------------------------------
[1.2559488 1.0394233]
[ 1.01222192 -1.05154357]
[-1.06002539 -0.90525602]
[-0.9708966   0.93644403]
1
--------------------------------------------------
[0.99865028 0.89422891]
[ 1.08225449 -1.12208436]
[-0.97911364 -1.19596701]
[-1.1328186   1.01968612]
1
--------------------------------------------------
[1.02186383 1.0881761 ]
[ 0.89909147 -1.15832942]
[-0.92262996 -1.05381417]
[-1.13466781  0.91194087]
1
--------------------------------------------------


In [2]:
import numpy as np

def generate_sequences(n=128, variable_len=False, seed=13):
    """ 以basic_corners代表的正方形四个顶点坐标为基准，再加上随机噪声、以顺时针或者逆时针方向
        生成给定形状(n, 4)的序列样本数据. 
    """
    basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]]) # 3, 2, 1, 4象限
    np.random.seed(seed)
    bases = np.random.randint(4, size=n)
    if variable_len:  # 是否生成可变长的序列
        lengths = np.random.randint(3, size=n) + 2
    else:
        lengths = [4] * n
    directions = np.random.randint(2, size=n)
    points = [
        basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + \
        np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)
    ]
    return points, directions

## 1.2 Data Preparation

### 1.2.1 定长的数据集

In [3]:
from torch.utils.data import DataLoader, TensorDataset
import torch


points, directions = generate_sequences(n=128, variable_len=False, seed=13)

x_tensor = torch.as_tensor(points).float()
t_tensor = torch.as_tensor(directions).float().view(-1, 1)

train_dataset = TensorDataset(x_tensor, t_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

  x_tensor = torch.as_tensor(points).float()


### 1.2.2 变长数据集

变长数据集涉及到`torch.nn.utils.rnn`的工具函数，弃坑！

# 2. Model Configuration & Training

模型配置以及训练涉及到RNN、GRU以及LSTM，弃坑！直接进入Transformer的世界！