In [4]:
import torch 
import torch.nn as nn

class PatchReprogram(nn.Module):
    def __init__(self, time_steps, patch_size):
        super().__init__()
        self.linear = nn.Linear(time_steps, patch_size)
        self.norm = nn.InstanceNorm1d(patch_size)
    
    def forward(self, time_series_data):
        """
        Input : (batch_size, time_steps)
        Output : (batch_size, patch_size)
        """
        patches = self.linear(time_series_data)
        return self.norm(patches)
# Example Usage
if __name__ == "__main__":
    data = torch.randn(1, 252)  # Fake stock data (252 trading days)
    patcher = PatchReprogram(time_steps=252, patch_size=16)
    patch_embeddings = patcher(data)
    print("Patch Embeddings Shape:", patch_embeddings, patch_embeddings.shape)

Patch Embeddings Shape: tensor([[ 0.0775, -1.9374,  0.9095,  0.9679, -0.4470, -2.1310,  1.3363, -0.1088,
          0.9213, -0.6648,  1.3608, -0.3220, -0.2833, -0.4444,  0.1376,  0.6279]],
       grad_fn=<SqueezeBackward1>) torch.Size([1, 16])




In [7]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer

class LLMEmbedder(nn.Module):
    def __init__(self, model_name = "microsoft/phi-2"):
        super().__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.hidden_size = self.model.config.hidden_size
    
    def embed_data(self, data):
        """
        Converts input data into LLM embeddings.
        output - (batch_size, seq_len, hidden_size)
        """
        self.tokenizer.pad_token = self.tokenizer.eos_token
        inputs = self.tokenizer(data, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model(**inputs)
        return outputs.last_hidden_state
if __name__ == "__main__":
    embedder = LLMEmbedder()
    text_embedding = embedder.embed_data("Predict next stock price movement")
    print("Text Embedding Shape:", text_embedding.shape)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.03s/it]


Text Embedding Shape: torch.Size([1, 6, 2560])


In [8]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer

class LLMEmbedder(nn.Module):
    def __init__(self, model_name = "microsoft/phi-2", target_dim=768):
        super().__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.hidden_size = self.model.config.hidden_size
        self.projection = nn.Linear(self.hidden_size, target_dim)
    
    def embed_data(self, data):
        """
        Converts input data into LLM embeddings.
        output - (batch_size, seq_len, hidden_size)
        """
        self.tokenizer.pad_token = self.tokenizer.eos_token
        inputs = self.tokenizer(data, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model(**inputs).last_hidden_state

        reduced_embeddings = self.projection(outputs)
        return reduced_embeddings
if __name__ == "__main__":
    embedder = LLMEmbedder()
    text_embedding = embedder.embed_data("Predict next stock price movement")
    print("Text Embedding Shape:", text_embedding.shape)

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.22it/s]


ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.