In [8]:
import random

lista = list(range(10))
random.shuffle(lista)
print(lista)

[4, 5, 9, 8, 0, 6, 3, 7, 2, 1]


In [None]:
import torch

x = torch.arange(128).reshape(2, 128)  # Tensor de 1 x 128
sub_tensor = x[:, 10:20]  # Toma de la columna 10 a la 19
sub_tensor
mask = x > 50  # Máscara con True donde el valor es mayor que 50
mask
x[mask]

tensor([ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
         79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,
         93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
        107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
        121, 122, 123, 124, 125, 126, 127])

In [23]:
import random


def select_indices_no_adjacent(length, n_swap):
    candidatos = list(range(length))
    random.shuffle(candidatos)
    seleccionados = []
    print(candidatos)
    for idx in candidatos:
        if len(seleccionados) >= n_swap:
            break
        # Verificamos que no haya conflicto: que idx no sea adyacente a ninguno ya seleccionado.
        if any(abs(idx - s) == 1 for s in seleccionados):
            continue
        seleccionados.append(idx)
    return sorted(seleccionados)


# Ejemplo: seleccionar 3 índices de 0 a 9 sin consecutivos
print(select_indices_no_adjacent(10, 3))

[2, 3, 1, 4, 6, 8, 0, 7, 9, 5]
[2, 4, 6]


In [34]:
import torch
import random


def generate_swap_mask(batch_size, seq_length=10, n_swap=4):
    """
    Genera una máscara de swaps (bool) de forma [batch_size, seq_length - 1]
    con exactamente n_swap 'True' por batch sin que se solapen (índices consecutivos).
    """
    mask = torch.zeros((batch_size, seq_length - 1), dtype=torch.bool)

    # Para cada elemento del batch, seleccionamos n_swap índices válidos
    for b in range(batch_size):
        candidatos = list(range(seq_length - 1))
        random.shuffle(candidatos)
        seleccionados = []
        for idx in candidatos:
            if len(seleccionados) >= n_swap:
                break
            # Evitamos adyacencia
            if any(abs(idx - s) == 1 for s in seleccionados):
                continue
            seleccionados.append(idx)
        seleccionados.sort()
        # Asignamos los índices seleccionados en la máscara para el batch b
        mask[b, seleccionados] = True
    return mask


# Ejemplo simple:
batch_size = 1
n_swap = 4
mask = generate_swap_mask(batch_size, seq_length=10, n_swap=n_swap)
print(mask)
print(mask.sum(dim=1))  # Debe imprimir [10, 10]

tensor([[False,  True, False, False,  True, False, False,  True, False]])
tensor([3])


In [36]:
def check_no_adjacent(mask):
    # Recorre cada fila y chequea que no existan dos True consecutivos.
    for b in range(mask.size(0)):
        row = mask[b].int()
        print(row[:-1])
        print(row[1:])
        print(row[:-1] + row[1:])
        # Se hace un desplazamiento para comparar elemento i y i+1
        if ((row[:-1] + row[1:]) > 1).any():
            return False
    return True


print("Restricción sin adyacentes:", check_no_adjacent(mask))

tensor([0, 1, 0, 0, 1, 0, 0, 1], dtype=torch.int32)
tensor([1, 0, 0, 1, 0, 0, 1, 0], dtype=torch.int32)
tensor([1, 1, 0, 1, 1, 0, 1, 1], dtype=torch.int32)
Restricción sin adyacentes: True


In [6]:
import torch, random


def gen_mask_single(length, n):
    if n > (length + 1) // 2:
        raise ValueError("Número de swaps demasiado alto")
    # Selecciona índices sin consecutividad usando "stars and bars"
    indices = sorted(random.sample(range(length - n + 1), n))
    indices = [x + i for i, x in enumerate(indices)]
    mask = [False] * length
    for i in indices:
        mask[i] = True
    return mask


def gen_mask(batch, length, n):
    return torch.tensor(
        [gen_mask_single(length, n) for _ in range(batch)], dtype=torch.bool
    )


def apply_swap(x, mask):
    y = x.clone()
    for i in range(mask.shape[1]):
        idx = torch.where(mask[:, i])[0]
        if idx.numel():
            temp = y[idx, i].clone()
            y[idx, i] = y[idx, i + 1]
            y[idx, i + 1] = temp
    return y


batch = 1
n_swaps = 5  # Número exacto de swaps por secuencia (máximo 64 para 127 posiciones)
x = torch.stack([torch.arange(10) for _ in range(batch)], dim=0)
mask = gen_mask(batch, 9, n_swaps)
y = apply_swap(x, mask)

print("x:\n", x)
print("\nmask:\n", mask)
print("\ny (x con swaps):\n", y)

x:
 tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

mask:
 tensor([[ True, False,  True, False,  True, False,  True, False,  True]])

y (x con swaps):
 tensor([[1, 0, 3, 2, 5, 4, 7, 6, 9, 8]])


In [7]:
import torch, random


def gen_mask_single(length, n):
    if n > (length + 1) // 2:
        raise ValueError("Número de swaps demasiado alto")
    inds = sorted(random.sample(range(length - n + 1), n))
    inds = [x + i for i, x in enumerate(inds)]
    mask = [False] * length
    for i in inds:
        mask[i] = True
    return mask


def gen_mask(batch, emb, length, n):
    # 'length' es la longitud de la máscara (para 128 elementos, length=127)
    return torch.tensor(
        [[gen_mask_single(length, n) for _ in range(emb)] for _ in range(batch)],
        dtype=torch.bool,
    )


def apply_swap(x, mask):
    # x: [batch, embedding, 128], mask: [batch, embedding, 127]
    y = x.clone()
    for i in range(mask.shape[-1]):  # i de 0 a 126
        idx0, idx1 = torch.where(mask[:, :, i])
        if idx0.numel():
            tmp = y[idx0, idx1, i].clone()
            y[idx0, idx1, i] = y[idx0, idx1, i + 1]
            y[idx0, idx1, i + 1] = tmp
    return y


# Ejemplo
batch, emb, len_x = 1, 2, 10
n_swaps = 1  # Número exacto de swaps por secuencia (máximo 64 para 127 posiciones)
mask = gen_mask(batch, emb, len_x - 1, n_swaps)
x = torch.stack(
    [
        torch.stack([torch.arange(len_x) for _ in range(emb)], dim=0)
        for _ in range(batch)
    ],
    dim=0,
)
y = apply_swap(x, mask)

print("x:\n", x)
print("\nmask:\n", mask)
print("\ny (x con swaps):\n", y)

x:
 tensor([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]])

mask:
 tensor([[[False, False, False, False,  True, False, False, False, False],
         [False, False, False, False, False, False, False,  True, False]]])

y (x con swaps):
 tensor([[[0, 1, 2, 3, 5, 4, 6, 7, 8, 9],
         [0, 1, 2, 3, 4, 5, 6, 8, 7, 9]]])


In [37]:
import torch as tr

x = tr.tensor(
    [
        [
            [1, 0, 0, 0, 0],
            [0, 0, 1, 0, 0],
            [0, 0, 0, 1, 0],
            [0, 1, 0, 0, 1],
        ]
    ]
)
x.shape

torch.Size([1, 4, 5])

In [62]:
import random

import torch as tr

x = tr.tensor(
    [
        [1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 0, 0, 1],
    ]
)
x.shape

x_l = [_ for _ in range(x.shape[-1])]
random.shuffle(x_l)
v = [0, 1, 2, 3]

x2 = x
print(x2)
N = 2
for s in range(N):
    nt = x_l[-1]
    x_l.pop()
    random.shuffle(v)
    val = tr.zeros([4], dtype=tr.float)
    val[v[0]] = 1.0
    x[:, nt] = val
print(x2)

tensor([[1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 0, 0, 1]])
tensor([[1, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1]])


In [74]:
def noise(x, N=0):
    assert N < x.shape[-1], "N should be lower than the shape of x (starting on 0)"
    x_l = [_ for _ in range(x.shape[-1])]
    random.shuffle(x_l)
    v = [0, 1, 2, 3]

    for _ in range(N):
        pos = x_l[-1]
        x_l.pop()
        random.shuffle(v)
        nt = tr.zeros([4], dtype=tr.float)
        nt[v[0]] = 1.0
        x[:, pos] = nt
    return x

In [76]:
x = tr.tensor(
    [
        [1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 0, 0, 1],
    ]
)
print(x.shape)

x = noise(x, N=4)
print(x)

torch.Size([4, 5])
tensor([[0, 0, 0, 1, 0],
        [0, 0, 1, 0, 0],
        [1, 1, 0, 0, 1],
        [0, 0, 0, 0, 0]])


In [2]:
from src.seq2seq.dataset import SeqDataset
from torch.utils.data import DataLoader
import pandas as pd