In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
from encdec_model import EncDec, Encoder, Decoder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

5. Predictions

In [4]:
model = torch.load('./model_backups/mkc_epoch3_colab.pth')
model.state_dict()

OrderedDict([('encoder.conv1.weight',
              tensor([[[[-0.0923,  0.0506,  0.0712,  0.0140,  0.0084],
                        [ 0.0562,  0.0119,  0.0960, -0.0736,  0.0565],
                        [ 0.0280, -0.0964, -0.0347, -0.0631,  0.0356],
                        [-0.0089,  0.0848, -0.0008, -0.0531, -0.0102],
                        [-0.1116,  0.0494, -0.0523, -0.0082,  0.0074]],
              
                       [[-0.0019, -0.0537,  0.1065,  0.0902, -0.0416],
                        [ 0.0736,  0.0435, -0.0699, -0.0180, -0.0895],
                        [-0.0572,  0.0141, -0.0940,  0.0714, -0.0074],
                        [ 0.0607, -0.0705,  0.1090,  0.0356,  0.0926],
                        [-0.0439,  0.0098, -0.0114, -0.0851, -0.0160]],
              
                       [[ 0.0316,  0.0029,  0.1147, -0.0073,  0.0065],
                        [-0.0462, -0.0384, -0.0217, -0.0228,  0.1084],
                        [ 0.0400,  0.0810, -0.0033,  0.0059, -0.0789],
       

In [5]:
test_csv = pd.read_csv('./col_774_A4_2023/SyntheticData/test.csv')
test_csv

Unnamed: 0,image,formula
0,1cb0b785da.png,$ \mathcal { F } _ { \mathrm { i n } } ^ { ( 0...
1,6f2229183a.png,"$ d ( l _ { 0 } + 1 , k _ { 1 } ; l _ { 0 } , ..."
2,49e5037a24.png,"$ \chi _ { k , l } ( q , \theta ) ~ \chi _ { 1..."
3,7f06160259.png,$ ( F _ { \mu \nu } ) = \left( \begin{array} {...
4,6cd2e6c9cb.png,"$ B ^ { 1 \alpha , 2 \beta } \left( \partial _..."
...,...,...
8902,f7625caeb5.png,$ d s ^ { 2 } = e ^ { - 2 k | y | } \left[ - \...
8903,1e71603fa6.png,$ { \cal U } _ { \hat { U } \hat { V } } = 0 . $
8904,11704a200a.png,$ ( C _ { n } \: { \cal { B } } \: C _ { n - 1...
8905,4c3927856d.png,$ d s ^ { 2 } = - d t ^ { 2 } + H _ { 5 } ^ { ...


In [6]:
# transform image array
tf_resize_normalize = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(0, 1)
])

In [62]:
class TestDataset(Dataset):
    def __init__(self, csv_file, directory='SyntheticData', transform=tf_resize_normalize):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.dir = directory

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 0]
        if self.dir == 'SyntheticData':
            image = Image.open(f"./col_774_A4_2023/{self.dir}/images/{img_name}")
        else:
            image = Image.open(f"./col_774_A4_2023/{self.dir}/images/train/{img_name}")

        if self.transform:
            image_tensor = self.transform(image)
            if self.dir != 'SyntheticData':
                image_tensor = torch.cat((image_tensor, image_tensor, image_tensor), dim=0)

        return image_tensor

In [None]:
test_data = TestDataset("./col_774_A4_2023/SyntheticData/test.csv")
val_data = TestDataset("./col_774_A4_2023/SyntheticData/val.csv")

In [29]:
def predict(model: EncDec, max_len=629, dir_data=test_data, device='cuda', batch_size=100):

    # if dir_data == 'test':
    #     loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    # else:
    #     loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

    loader = DataLoader(dir_data, batch_size=batch_size, shuffle=True)

    final_latex = []
    model.eval()

    with torch.no_grad():
        for i, (images) in enumerate(loader):

            images = images.to(device)
            context = model.encoder(images)
            
            input_dim = context.shape[0]
            hidden = None

            input_token = torch.tensor(
                [[model.word_to_index['<SOF>']]]*input_dim).to(device)

            for _ in range(max_len):
                output, hidden = model.decoder(
                    context, input_token, hidden)

                predicted_token = output.argmax(dim=2)

                if _ == 0:
                    predicted_tokens = predicted_token
                else:
                    predicted_tokens = torch.cat((predicted_tokens, predicted_token), dim=1)

                input_token = predicted_token
                # print(predicted_tokens)

            for i in range(input_dim):
                predicted_latex_list = []
                for j in range(max_len):
                    symbol = model.index_to_word[int(predicted_tokens[i,j])]
                    predicted_latex_list.append(symbol)
                
                # print(predicted_latex_list[0])
                predicted_latex = ' '.join(predicted_latex_list)

                final_latex.append(predicted_latex)

    return final_latex

In [14]:
test_predict = predict(model)

In [25]:
test_pred_df = pd.DataFrame(data=test_csv['image'], columns=['image'])
test_pred_df['formula'] = test_predict

In [28]:
test_pred_df.to_csv('SyntheticData_test_pred', index=False)

In [38]:
def generate_csv(model, dir_list=['SyntheticData/val','HandwrittenData/val_hw']):
    for dirt in dir_list:
        data = TestDataset(f"./col_774_A4_2023/{dirt}.csv")
        print(len(data))
        # set max_len=200 bcuz it rarely goes to max
        predict_list = predict(model, dir_data=data, max_len=200)
        
        pred_df = pd.DataFrame(data=test_csv['image'], columns=['image'])
        pred_df['formula'] = test_predict

        file_name = '_'.join(dirt.split('/')) + '_pred'

        pred_df.to_csv(file_name, index=False)
        print(file_name, 'done')

In [None]:
generate_csv(model, dir_list=['HandwrittenData/val_hw', 'SyntheticData/val'])

In [45]:
val_csv = pd.read_csv('./col_774_A4_2023/SyntheticData/val.csv')
val_hw_csv = pd.read_csv('./col_774_A4_2023/HandwrittenData/val_hw.csv')

In [47]:
val_predict = predict(model, dir_data=val_data, max_len=200)

val_pred_df = pd.DataFrame(data=val_csv['image'], columns=['image'])
val_pred_df['formula'] = val_predict

val_pred_df.to_csv('SyntheticData_val_pred', index=False)

In [63]:
sus_data = TestDataset("./col_774_A4_2023/HandwrittenData/val_hw.csv", directory='HandwrittenData')

In [64]:
val_predict = predict(model, dir_data=sus_data, max_len=200)

val_pred_df = pd.DataFrame(data=val_hw_csv['image'], columns=['image'])
val_pred_df['formula'] = val_predict

val_pred_df.to_csv('HandwrittenData_val_pred', index=False)

-- Old code --

In [58]:
imgaa=val_hw_csv['image'][0]
imgr = Image.open(f"./col_774_A4_2023/HandwrittenData/images/train/{imgaa}")
imgr_tensor = tf_resize_normalize(imgr)
imgr_tensor = torch.cat()

torch.Size([1, 224, 224])

In [59]:
imgaa = val_csv['image'][0]
imgr = Image.open(f"./col_774_A4_2023/SyntheticData/images/{imgaa}")
imgr_tensor = tf_resize_normalize(imgr)
imgr_tensor.shape

torch.Size([3, 224, 224])

In [61]:
imgr_tensor = torch.cat((imgr_tensor, imgr_tensor, imgr_tensor), dim=0)
imgr_tensor.shape

torch.Size([9, 224, 224])

In [32]:
model.index_to_word[372]

'{'

In [62]:
axew = torch.tensor([[2]*63]*10)
print(axew[4,30] ,int(axew[4][30]))

tensor(2) 2


In [15]:
def predict_old(model: EncDec, dir_folder='SyntheticData', dir_data='test', device='cuda', batch_size=100):
    # loader = pd.read_csv(f'./col_774_A4_2023/{dir_folder}/{dir_data}.csv')

    if dir_data == 'test':
        loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    else:
        loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)

    final_latex = []
    for i, (images) in enumerate(loader):

        images = images.to(device)
        context_vectors = model.encoder.forward(images)
        hidden = None

        predicted_tokens = []
        # output_seq = torch.tensor([[model.word_to_index['<SOF>']]] * batch_size).to(device)
        input_token = torch.tensor(
            [[model.word_to_index['<SOF>']]]*batch_size).to(device)

        for i in range(model.out_size):
            # print(context_vectors.shape)
            # print(input_token.shape)
            output, hidden = model.decoder.forward(
                context_vectors, input_token, hidden)

            predicted_token = output.argmax(dim=2)
            predicted_tokens.append(predicted_token)

            input_token = predicted_token

        for j in range(len(predicted_tokens)):
            predicted_latex = []
            for i in range(batch_size):
                #    print(predicted_tokens[j].shape)
                #    print(predicted_tokens[j][0,i].item())
                predicted_latex.append(
                    model.index_to_word[predicted_tokens[j][i, 0].item()])

            final_latex.append(predicted_latex)

    return final_latex