**Imports**

In [None]:
import os
from torch.utils.data import Dataset, DataLoader

**Datasets**

In [None]:
class RbpCompeteSequenceDataset(Dataset):
    def __init__(self, data_dir, sequence_file):
        self.data_dir = data_dir
        self.sequence_file = sequence_file
        self.files = [f for f in os.listdir(data_dir) if f.startswith('RBP') and f.endswith('.txt')]
        self.sequences = self._load_sequences(sequence_file)
        self.data = self._load_data()

    def _load_sequences(self, sequence_file):
        sequences = []
        with open(sequence_file, 'r') as f:
            for line in f:
                sequences.append(line.strip())
        return sequences

    def _load_data(self):
        data = []
        for file in self.files:
            for seq in self.sequences:
                with open(os.path.join(self.data_dir, file), 'r') as f:
                    for line in f:
                        data.append((seq, line))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x, y = self.data[idx]
        return x, y

In [None]:
class RbpSelexDataset(Dataset):
    def __init__(self, directory):
        self.directory = directory
        self.files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.txt')]
        self.data = []

        for file in self.files:
            with open(file, 'r') as f:
                for line in f:
                    sequence, _ = line.strip().split(',')
                    label = int(file[:-4].split('_')[1])  # Extract the label
                    self.data.append((sequence, label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x, y = self.data[idx]
        return x, y