shape of image:

```
SCREEN_WIDTH = 450
SCREEN_HEIGHT = 200
```


In [33]:
SCREEN_WIDTH = 450
SCREEN_HEIGHT = 200

CLASStorch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)

input: (N, C_in, H_in, W_in)

output: (N, C_out, H_out, W_out)

$$H_{out} = \frac{H_{in} + 2 \times padding[0] - dilation[0] \times (kernel\_size[0] -1) -1}{stride[0]}+1 $$

In [34]:
import torch
from torch import nn

class CNN(nn.Module):
    def __init__(self, out_channels=6, kernel_size=1, stride=1, padding=0, normalization=False):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=out_channels, 
            kernel_size=kernel_size, stride=stride, padding=padding
        )
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(135000, 5) 

    def forward(self, x):
        print("original shape:", x.shape)

        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        print("after C shape:", x.shape)
        x = x.view(x.size(0), -1)
        print("after view:", x.shape)
        x = self.fc1(x)
        return x


In [35]:
import torch.optim as optim
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Read Image

In [36]:
from torchvision import transforms
transform_norm = transforms.Compose([
    transforms.ToTensor(),
])

image_path = "../converttext/imgdataset/5AMPLETyponoised.jpg"
img = Image.open(image_path)
dog = transform_norm(img)
# dog = normalize(dog)
dog = torch.unsqueeze(dog, 0)
output = model(dog.to(device))
print(output)

original shape: torch.Size([1, 3, 200, 450])
after C shape: torch.Size([1, 6, 100, 225])
after view: torch.Size([1, 135000])
tensor([[ 0.3846, -0.1179, -0.1451, -0.5415,  0.4295]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


Tokenizer

In [37]:
from tokenizers import ByteLevelBPETokenizer
tokenizer = ByteLevelBPETokenizer()
paths = ["../dataset/ted_dev_en-de.raw.en.txt"]
tokenizer.train(files=paths, vocab_size=52_000, min_frequency=1, special_tokens=[])

In [44]:
tokenizer.token_to_id("so")

649

In [46]:
tokenizer.get_vocab_size()

8681

In [92]:
tokenizer.get_vocab()

{'ers': 492,
 'Ġlectured': 8347,
 'Ġpersonal': 2707,
 'Ġsolves': 6993,
 'ý': 185,
 'Ġentry': 3367,
 'ater': 946,
 'Ġtransmission': 6919,
 'ĠLeap': 7811,
 'Ġof': 286,
 'Ġsol': 1216,
 'dol': 2937,
 'Ġlucky': 3115,
 'ĠLope': 4244,
 'Ġline': 3113,
 'ected': 2063,
 'Ġupdates': 8002,
 'Ġglobe': 7029,
 'Ġmiser': 3808,
 'ĠU': 627,
 'Ġslowing': 7061,
 'Ġrestroom': 7199,
 'ptive': 6614,
 'Ġweekday': 3456,
 'Ġincubators': 7421,
 'Ġweeke': 5754,
 'Looks': 7361,
 'Ġperiods': 7313,
 'ĠToday': 4123,
 'Ġaddressing': 7512,
 'Ġunequal': 8494,
 'Ġeasily': 3513,
 'augh': 523,
 'ĠBruno': 7344,
 'Ġ"': 376,
 'ĠDarwinian': 7615,
 'Sam': 4937,
 'ided': 1412,
 'Ġprotoc': 7039,
 'Ġbow': 5414,
 'Ġusers': 2666,
 'hetics': 3670,
 'Ġhaving': 2012,
 'arked': 6779,
 'reen': 1327,
 'Ġ60': 2394,
 'Ġpotential': 2136,
 'Ġeverything': 1515,
 'Ġprodu': 1504,
 'ental': 3145,
 'lie': 876,
 'Ġzones': 6868,
 'c': 66,
 'ĠAngeles': 2134,
 'Ġpositions': 6846,
 'Ġhorizontally': 8662,
 'ĠBecause': 3285,
 'safe': 4453,
 'Ġconstraints

In [84]:
from collections import defaultdict
from nltk.tokenize import word_tokenize

id_to_word_list = []                    # list: id   -> word
word_num_dict   = defaultdict(int)      # dict: word -> num(word)
word_to_id_dict = defaultdict(int)      # dict: word -> id
id_to_word_dict = defaultdict(str)      # dict: id   -> word
with open("../dataset/ted_dev_en-de.raw.en.txt", 'r', encoding='utf-8') as f:
    for line in f:
        if line != '\n':
            words = line.strip()
            tokens = word_tokenize(words)
            for token in tokens:
                if token.isalpha():
                    if token not in id_to_word_list:
                        id_to_word_list.append(token)
                    word_num_dict[token] +=1

for id, word in enumerate(id_to_word_list):
    word_to_id_dict[word] = id
    id_to_word_dict[id]   = word

In [85]:
print("length of word_num_dict:", len(word_num_dict.keys()))
print("length of id_to_word_dict:", len(id_to_word_dict.keys()))
print("length of word_id_dict:", len(word_to_id_dict.keys()))
print(f"index: [5], word in id_to_word_dict: [{id_to_word_dict[5]}], id in word_id_dict: [{word_to_id_dict[id_to_word_dict[5]]}]")


length of word_num_dict: 4572
length of id_to_word_dict: 4572
length of word_id_dict: 4572
index: [5], word in id_to_word_dict: [defines], id in word_id_dict: [5]


In [86]:
# save dicts

import json

with open('word_num_dict.json', 'w') as fp:
    json.dump(word_num_dict, fp)
with open('word_to_id_dict.json', 'w') as fp:
    json.dump(word_to_id_dict, fp)
with open('id_to_word_dict.json', 'w') as fp:
    json.dump(id_to_word_dict, fp)

In [91]:
with open('word_num_dict.json', 'r') as fp:
    word_num_dict_test = json.load(fp)
with open('word_to_id_dict.json', 'r') as fp:
    word_to_id_dict_test = json.load(fp)
with open('id_to_word_dict.json', 'r') as fp:
    id_to_word_dict_test = json.load(fp)
    
print("test load: length of word_num_dict:", len(word_num_dict_test.keys()))
print("test load: length of id_to_word_dict:", len(word_to_id_dict_test.keys()))
print("test load: length of word_id_dict:", len(word_to_id_dict_test.keys()))
print('note index needs to use str(index)')
print(f"test load: index: [5], word in id_to_word_dict: [{id_to_word_dict_test[str(5)]}], id in word_id_dict: [{word_to_id_dict_test[id_to_word_dict_test['5']]}]")


test load: length of word_num_dict: 4572
test load: length of id_to_word_dict: 4572
test load: length of word_id_dict: 4572
note index needs to use str(index)
test load: index: [5], word in id_to_word_dict: [defines], id in word_id_dict: [5]


Train

In [48]:
# !pip install nltk

Collecting nltk


[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 6.4 MB/s eta 0:00:00
Collecting click
  Downloading click-8.1.6-py3-none-any.whl (97 kB)
     ---------------------------------------- 97.9/97.9 kB 5.8 MB/s eta 0:00:00
Installing collected packages: click, nltk
Successfully installed click-8.1.6 nltk-3.8.1


dataset / dataloader

pipeline