In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn
from torch.utils.data import ConcatDataset, DataLoader, Dataset
import numpy as np
from nptyping import Float32, NDArray, Number, Shape, UInt
from transformers import ViTModel
import pytorch_lightning as pl

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.insert(0, module_path)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class SignedDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        # [n_video, nb_frames, 3, 320, 240]
        self.Y = Y
        # [n_video, nb_signes, 1]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.Y[i]

In [90]:
nb_classes = 1999

class GRU_Translator(pl.LightningModule):
	def __init__(
		self,
		H_input_size: int = 760,
		H_output_size: int = 100,
		num_layers: int = 1,
		dropout: int = 0,
		corpus: str = "/usr/share/dict/words",
	):
		super().__init__()
		self.save_hyperparameters()
		self.vocabulary_size = nb_classes
		self.layer_gru = nn.GRU(
			input_size=self.hparams.H_input_size,
			hidden_size=self.hparams.H_output_size,
			num_layers=self.hparams.num_layers,
			batch_first=True,
			dropout=self.hparams.dropout,
		)
		# print(f"{self.hparams.H_input_size= }\n{self.hparams.H_output_size}")

		self.layer_1_dense = nn.Linear(self.hparams.H_output_size, 400)
		self.layer_1_relu = nn.ReLU()
		self.layer_2_dense = nn.Linear(400, self.vocabulary_size)
		self.layer_2_relu = nn.ReLU()
		self.softmax = nn.Softmax(dim=2)
			# x = torch.squeeze(x)

	def forward(self, X):
		# print(f"gru before: {X.shape = }")
		X, hidden = self.layer_gru(X)
		# print(f"gru after: {X.shape = }")
		X = self.layer_1_dense(X)
		X = self.layer_1_relu(X)
		# print(f"gru: {X.shape = }")
		X = self.layer_2_dense(X)
		X = self.layer_2_relu(X)
		# print(f"gru: {X.shape = }")
		X = self.softmax(X)
		# print(f"gru end: {X.shape= }")
		# print(f"gru: {X.shape = }")
		return X

class BaseSquareNet(pl.LightningModule):
	def __init__(
		self,
		corpus: str = "/usr/share/dict/words",
		batch_size: int = 1,
		nb_batch: int = 1,
		sequence_size: int = 16,
	):
		super().__init__()
		self.save_hyperparameters()

		self.batch_size = batch_size
		self.nb_batch = nb_batch
		# self.vocabulary_size3= len(np.array(open(corpus).read().splitlines()))
		self.vocabulary_size = nb_classes
		self.recurrent_translator = GRU_Translator(
			H_input_size=760,
			H_output_size=75,
			num_layers=1,
			dropout=0,
			corpus=corpus,
		)

	def forward(
		self, x: NDArray[Shape["* batch, 224, 224, 3"], Float32]
	) -> NDArray[Shape["* batch, * vocab size"], Float32]:
		x = self.recurrent_translator(x)
		# print(f"Bsqr: {x.shape = }")
		return x


# NB_BATCH = BATCH
# BATCH_SIZE = SEQUENCE
nb_batch = 2
batch_size = 2

x = torch.rand((nb_batch, batch_size, 760))
y = torch.randint(0, nb_classes, (nb_batch, batch_size))

print(y.shape)
# Batch, Sequence, Hin
corpus="/home/dolmalin/Documents/work/42ai/Hand2Text/data/H2T/wlasl_words"


model = BaseSquareNet(corpus=corpus, nb_batch=nb_batch, batch_size=batch_size)
dataset = SignedDataset(x, y)

dataloader = DataLoader(dataset=dataset, batch_size=batch_size)
learning_rate = 1e-2
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def train(train_loader, model, loss_fn, optmizer):
	loss = 10
	idx = 0
	while loss > 0:
		for batch_idx, (X, y) in enumerate(train_loader):
			pred = model(X)
			pred = pred.permute(1, 2, 0)
			# print(pred.shape)
			loss = loss_fn(pred, y)
			loss.backward()
			optimizer.step()
			optimizer.zero_grad()
			if batch_idx % 100 == 0:
				print(f'{idx} loss: {loss}\r', end='')
			idx += 1

torch.Size([2, 2])


In [91]:
train(dataloader, model, loss_fn, optimizer)

558868 loss: 6.8511991500854495

KeyboardInterrupt: 