In [None]:
class OCR_F(nn.Module):
    def __init__(self, charset_size, hidden_size, lstm_layers):
        assert(charset_size < 256)
        super(OCR_F, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 6, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(6, 15, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(15, 42, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.hidden_size = hidden_size
        self.lstm_layers = lstm_layers
        self.lstm = nn.LSTM(42 * 103 * 212, hidden_size, lstm_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size, charset_size)

    def forward(self, x): #[1, 825, 1697]
        x = self.pool1(F.relu(self.conv1(x))) #[6, 412, 848]
        x = self.pool2(F.relu(self.conv2(x))) #[15, 206, 424]
        x = self.pool3(F.relu(self.conv3(x))) #[42, 103, 212]
        x = x.view(x.size(0), -1)
        h0 = torch.zeros(self.lstm_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm_layers, x.size(0), self.hidden_size).to(x.device)
        out, __ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return x

In [None]:
class OCR_S(nn.Module):
    def __init__(self, charset_size, hidden_size, lstm_layers):
        super(OCR_S, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 6, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(6, 15, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(15, 42, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # For sequence-to-sequence, we'll use an LSTM cell as the decoder
        self.decoder_lstm = nn.LSTMCell(charset_size, hidden_size * 2)
        self.character_prob = nn.Linear(hidden_size * 2, charset_size)

    def forward(self, x, max_output_length):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        #x = x.view(x.size(0), 1, -1)  # Reshape for LSTM input
        
        h0 = torch.zeros(x.size(0), self.hidden_size * 2).to(x.device)
        c0 = torch.zeros(x.size(0), self.hidden_size * 2).to(x.device)

        hidden_states = [h0]
        cell_states = [c0]
        predictions = []

        for _ in range(max_output_length):
            input = hidden_states[-1]
            h, c = self.decoder_lstm(input, (hidden_states[-1], cell_states[-1]))
            hidden_states.append(h)
            cell_states.append(c)
            pred = self.character_prob(h)
            predictions.append(pred)

        return torch.stack(predictions, dim=1)

In [None]:
class TransformerEncoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(TransformerEncoder, self).__init__()
        self.linear_q = nn.Linear(input_size, hidden_size)
        self.linear_k = nn.Linear(input_size, hidden_size)
        self.linear_v = nn.Linear(input_size, hidden_size)
        self.linear_x = nn.Linear(input_size, hidden_size)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size)
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size)
        )
        self.norm = nn.LayerNorm(hidden_size)
    
    def forward(self, x):
        q, k, v = self.linear_q(x), self.linear_k(x), self.linear_v(x)
        x = self.norm(self.linear_x(x) + self.attention(q, k, v))
        x = self.norm(x + self.fc(x))
        return x

class MultiLayerTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(MultiLayerTransformer, self).__init__()
        self.layers = nn.ModuleList([
            TransformerEncoder(input_size, hidden_size) for _ in range(num_layers)
        ])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text, entities = self.data[index]
        return text, entities

def custom_collate(batch):
    texts, entities = zip(*batch)
    return texts, entities

# Your example data
example_list = [
    ["String 1", ["A", "B", "C", "D"]],
    ["String 2", ["E", "F", "G", "H"]],
    ["String 3", ["D", "F", "G", "H"]],
    ["String 4", ["F", "F", "G", "H"]],
    ["String 5", ["G", "F", "G", "H"]],
    ["String 6", ["H", "F", "G", "H"]],
    ["String 7", ["I", "F", "G", "H"]],
    ["String 8", ["J", "F", "G", "H"]],
    # Add more entries as needed
]

custom_dataset = CustomDataset(example_list)

custom_dataloader = DataLoader(
    custom_dataset, batch_size=3, collate_fn=custom_collate
)

for batch_texts, batch_entities in custom_dataloader:
    print("Batch Texts:", batch_texts)
    print("Batch Entities:", batch_entities)

Batch Texts: ('String 1', 'String 2', 'String 3')
Batch Entities: (['A', 'B', 'C', 'D'], ['E', 'F', 'G', 'H'], ['D', 'F', 'G', 'H'])
Batch Texts: ('String 4', 'String 5', 'String 6')
Batch Entities: (['F', 'F', 'G', 'H'], ['G', 'F', 'G', 'H'], ['H', 'F', 'G', 'H'])
Batch Texts: ('String 7', 'String 8')
Batch Entities: (['I', 'F', 'G', 'H'], ['J', 'F', 'G', 'H'])


In [None]:
class ExSet(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img, text, entities = self.data[index]
        return text, entities

def custom_collate(batch):
    texts, entities = zip(*batch)
    return texts, entities

In [None]:
train_set = ExSet(train_data)
val_set = ExSet(val_data)
test_set = ExSet(test_data)

In [None]:
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15
train_data, temp_data = train_test_split(records, test_size=(1 - train_ratio))
val_data, test_data = train_test_split(temp_data, test_size=test_ratio / (val_ratio + test_ratio))
print("Training set len:", len(train_data))
print("Validation set len:", len(val_data))
print("Test set len:", len(test_data))

In [None]:
if 'records' in globals():
    del records
gc.collect()

In [None]:
char_vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
              'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
              '`', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '=', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', '+',
              '[', ']', '\\', ';', '\'', ',', '.', '?', '{', '}', '|', ':', '\"', '<', '>', '?']

def index_to_char(i):
    return char_vocab[i]

def char_to_index(c):
    return char_vocab.index(c)


In [5]:
import torchtext.legacy

ModuleNotFoundError: No module named 'torchtext.legacy'

In [6]:
import torchtext

In [7]:
from torchtext.legacy import data

ModuleNotFoundError: No module named 'torchtext.legacy'

In [None]:
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, collate_fn=custom_collate)
for texts, ents in train_loader:
    print(len(texts), len(ents))
    break