In [29]:
pip install --upgrade transformers

Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable


In [30]:
!pip install sentencepiece

Defaulting to user installation because normal site-packages is not writeable


In [31]:
pip show sentencepiece

Name: sentencepieceNote: you may need to restart the kernel to use updated packages.

Version: 0.2.0
Summary: SentencePiece python wrapper
Home-page: https://github.com/google/sentencepiece
Author: Taku Kudo
Author-email: taku@google.com
License: Apache
Location: C:\Users\jayan\AppData\Roaming\Python\Python311\site-packages
Requires: 
Required-by: 


In [32]:
import sentencepiece
print("SentencePiece imported successfully.")

SentencePiece imported successfully.


In [25]:
from transformers import T5Tokenizer
try:
    tokenizer=T5Tokenizer.from_pretrained('t5-small')
    print("T5Tokenizer loaded successfully.")
except Exception as e:
    print(f"Error: {e}")

T5Tokenizer loaded successfully.


In [2]:
#How2sign  dataset
import json
import os.path

import numpy as np #importing necessary libraries
import pandas as pd


class How2signDataset:
    def __init__(self, json_files, csv_file, seq_len=183, time_len=512):
        self.seq_len = seq_len
        self.time_len = time_len
        self.json_files = json_files
        self.csv_file = csv_file

        self.sentence_dict = self.load_y()

    def get_x(self, x_path):
        # Load the JSON data
        with open(x_path, 'r') as f:
            data = json.load(f)

        # Extract the hand_pose_face keypoints
        hand_pose_faces = [person['hand_pose_face'] for person in data['people']]

        hand_pose_faces = np.array(hand_pose_faces)
        hand_pose_faces = hand_pose_faces.reshape(1, -1, self.seq_len)

        # Create an array to store the padded data
        x = np.zeros((1, self.time_len, self.seq_len))

        # Fill the padded array with the actual data
        seq_length = len(hand_pose_faces[0])
        x[:, :seq_length, :] = hand_pose_faces[:, :seq_length, :]

        return x

    def load_y(self):
        data = pd.read_csv(self.csv_file, delimiter='\t', on_bad_lines='skip')
        df = data[['SENTENCE_NAME', 'SENTENCE']]
        sentence_dict = pd.Series(df.SENTENCE.values, index=df.SENTENCE_NAME).to_dict()

        return sentence_dict

    def get_y(self, x_base_path):
        y = self.sentence_dict.get(x_base_path, "0")

        return y

    def how2sign_keypoints_sentence(self):
        # Load the data from multiple files
        x = [self.get_x(json_file) for json_file in self.json_files]
        x = np.concatenate(x, axis=0)

        json_files_base = [json_file.split(".")[0] for json_file in self.json_files]
        print(json_files_base)
        y = [self.get_y(json_file_base) for json_file_base in json_files_base]
        y = np.array(y)

        # Concatenate the data from the files
        return x, y

In [3]:
json_files = ["CO6qyvvglAE_18-5-rgb_front.json"]
csv_file = "how2sign_realigned_val.csv"

# json_files = find_files("data/how2sign/realigned_val", pattern='**/*.json', interval=1)
dataset = How2signDataset(json_files=json_files, csv_file=csv_file)
x, y = dataset.how2sign_keypoints_sentence()

print(x)
print(y) 
print('-'*100)
print(x.shape)
print(y.shape)

['CO6qyvvglAE_18-5-rgb_front']
[[[0.64002734 0.63365875 0.639195   ... 0.5492125  0.30881625 0.887098  ]
  [0.64035156 0.62237875 0.580242   ... 0.55052266 0.30683125 0.922378  ]
  [0.63711953 0.59722625 0.608995   ... 0.55191484 0.30503375 0.947329  ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]]
['Now a great feature of the Denon CD player is, if you decide this is the track you want, instead of hitting pause, which just pauses it, or hitting stop, which actually would bring you back to track one, you can hit, go find the track you want again, track three in this case.']
----------------------------------------------------------------------------------------------------
(1, 512, 183)
(1,)


In [4]:
import torch
import torch.nn as nn

class LinearProjectionWithConv(nn.Module):
    def __init__(self, embed_dim):
        super(LinearProjectionWithConv, self).__init__()
        # Kernel size is (183, 1) and stride is (183, 1)
        self.conv = nn.Conv2d(in_channels=1, out_channels=embed_dim, kernel_size=(183, 1), stride=(183, 1))
    
    def forward(self, x):
        # x shape: (batch_size, 183, 1024)
        x = x.unsqueeze(1)  # Add channel dimension: (batch_size, 1, 183, 1024)
        x = self.conv(x)  # Apply convolution
        x = x.squeeze(2)  # Remove the channel dimension: (batch_size, embed_dim, new_sequence_length)
        return x


In [13]:
import os
import json
from transformers import T5Tokenizer
from torch.utils.data import Dataset, DataLoader
from glob import glob

import json
import os

def  find_files(directory, pattern='**/*.json'):
    return glob(os.path.join(directory,pattern),recursive=True)

'''# Define paths to your folders
train_folder = "J:\\train_2D_keypoints\\openpose_output\\output"
val_folder = "J:\\val_2D_keypoints\\openpose_output\\output"
test_folder = "J:\\test_2D_keypoints\\openpose_output\\output" '''

'''# Load the datasets
train_data = find_files(train_folder)
val_data = find_files(val_folder)
test_data = find_files(test_folder)'''



# Tokenizer and Dataset Preparation
tokenizer = T5Tokenizer.from_pretrained('t5-small')



'''class How2signDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer.encode_plus(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze()
        } '''

# Create datasets and dataloaders
#y data
train_DATA= "J:\\train_2D_keypoints\\openpose_output\\output"
csv_file = "J:\Folders\Downloads\how2sign_realigned_train.csv"
train_data=find_files(train_DATA)
# train_dataset = How2signDataset(train_data,csv_file)
# x_train, y_train = train_data.how2sign_keypoints_sentence()
# print(x_train.shape, y_train.shape)

val_DATA= "J:\\val_2D_keypoints\\openpose_output\\output"
csv_file = "J:\Folders\Downloads\how2sign_realigned_val.csv"
val_data=find_files(val_DATA)
# val_dataset = How2signDataset(val_data, csv_file)
# x_val, y_val = val_data.how2sign_keypoints_sentence()
# print(x_val.shape, y_val.shape)


test_DATA = "J:\\test_2D_keypoints\\openpose_output\\output"
csv_file = "J:\Folders\Downloads\how2sign_realigned_test.csv"
test_data=find_files(test_DATA)
test_dataset = How2signDataset(test_data,csv_file)
x_test, y_test = test_data.how2sign_keypoints_sentence()
print(x_test.shape, y_test.shape)



train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=8)
test_dataloader = DataLoader(test_dataset, batch_size=8)


AttributeError: 'list' object has no attribute 'how2sign_keypoints_sentence'

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

class T5WithProjection(nn.Module):
    def __init__(self, t5_model_name, embed_dim):
        super(T5WithProjection, self).__init__()
        self.t5 = T5ForConditionalGeneration.from_pretrained('t5-small')
        self.projection = LinearProjectionWithConv(embed_dim)
    
    def forward(self, input_ids, attention_mask=None, labels=None):
         # Apply linear projection
        input_ids = self.projection(outputs.logits)  # Apply projection to logits
        # Forward pass through T5 model
        outputs = self.t5(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
       
        return outputs

# Initialize the model with projection
embed_dim = 1  # Adjust according to your T5 model's hidden size
model = T5WithProjection('t5-small', embed_dim)


In [7]:
import torch
from transformers import AdamW
from tqdm import tqdm
import torch.nn as nn
from transformers import T5ForConditionalGeneration, AdamW, T5Tokenizer
from torch.utils.data import DataLoader, Dataset

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model and tokenizer
model = T5ForConditionalGeneration.from_pretrained('t5-small')
model.to(device)
tokenizer = T5Tokenizer.from_pretrained('t5-small')

'''# test
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
labels = tokenizer("example sentence", return_tensors="pt").input_ids

print(input_ids)
print(labels)'''

# Initialize the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Define your dataset and dataloader
train_dataset =  How2signDataset(tokenizer,max_len,max_len)  
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Define the training function
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits
        logits = logits.view(-1, logits.size(-1))
        labels = input_ids.view(-1)
        loss = nn.functional.cross_entropy(logits, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

# Train the model
for epoch in range(3):  # Number of epochs
    train_loss = train_epoch(model, train_dataloader, optimizer, device)
    print(f"Epoch {epoch + 1}: Training loss = {train_loss:.4f}")



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\jayan\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\jayan\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\jayan\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_lo

NameError: name 'TextDataset' is not defined

In [6]:
# Save the model
model.save_pretrained('t5-with-projection')
tokenizer.save_pretrained('t5-with-projection')

NameError: name 'model' is not defined