In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset
from momentfm import MOMENTPipeline
from transformers import Trainer, TrainingArguments

import gc
from tqdm import trange
import time
import os

  torch.utils._pytree._register_pytree_node(


In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
model = MOMENTPipeline.from_pretrained(
    "AutonLab/MOMENT-1-large",
    model_kwargs={"task_name":"embedding"}
)
model.init()
model.to("cuda").float()



MOMENTPipeline(
  (normalizer): RevIN()
  (tokenizer): Patching()
  (patch_embedding): PatchEmbedding(
    (value_embedding): Linear(in_features=8, out_features=1024, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
  

In [11]:
root_dir = "numpy_data"
embedding_result = []


for file in os.listdir(root_dir):
    if file.endswith(".npy"):
        data = np.load(f"{root_dir}/{file}")
        print(data.shape)
    else:
        continue
    
    scaler = StandardScaler()
    data = scaler.fit_transform(data)

    if isinstance(data, np.ndarray):
        data = torch.tensor(data, dtype=torch.float32)

    data = data.T.unsqueeze(0)

    chunk_size = 2048
    all_embeddings = []
    
    for start in trange(0, data.shape[2], chunk_size):
        end = min(start + chunk_size, data.shape[2])
        chunk = data[:, :, start:end].to("cuda")
        chunk_mask = torch.ones(
            chunk.shape[0],
            chunk.shape[2],
            dtype=bool, 
            device="cuda"
        )
    
        with torch.no_grad():
            out = model(x_enc=chunk, input_mask=chunk_mask)
            embeddings = out.embeddings.cpu()
            all_embeddings.append(embeddings)
    
        del chunk, chunk_mask, out, embeddings
        torch.cuda.empty_cache()
        gc.collect()
    
    final_embeddings = torch.cat(all_embeddings, dim=0)

    embedding = final_embeddings.mean(dim=0)
    embedding_result.append(embedding)

(646595, 111)


  return fn(*args, **kwargs)
100%|████████████████████████████████████████████████████████████████████| 316/316 [9:00:23<00:00, 102.61s/it]


(238789, 89)


  return fn(*args, **kwargs)
100%|████████████████████████████████████████████████████████████████████| 117/117 [4:37:15<00:00, 142.18s/it]


(1079197, 105)


  return fn(*args, **kwargs)
100%|█████████████████████████████████████████████████████████████████████| 527/527 [1:44:27<00:00, 11.89s/it]


(621381, 105)


  return fn(*args, **kwargs)
100%|█████████████████████████████████████████████████████████████████████| 304/304 [3:55:14<00:00, 46.43s/it]


(1076316, 106)


  return fn(*args, **kwargs)
100%|█████████████████████████████████████████████████████████████████████| 526/526 [2:33:43<00:00, 17.54s/it]


(939419, 105)


  return fn(*args, **kwargs)
100%|████████████████████████████████████████████████████████████████████| 459/459 [12:40:52<00:00, 99.46s/it]


(635546, 105)


  return fn(*args, **kwargs)
100%|█████████████████████████████████████████████████████████████████████| 311/311 [2:04:36<00:00, 24.04s/it]


(454541, 111)


  return fn(*args, **kwargs)
100%|███████████████████████████████████████████████████████████████████| 222/222 [11:07:01<00:00, 180.28s/it]


(664797, 111)


  return fn(*args, **kwargs)
 11%|██████▉                                                        | 36/325 [16:51:30<135:20:10, 1685.85s/it]


KeyboardInterrupt: 

In [None]:
print(embedding_result)

In [3]:
scaler = StandardScaler()
data = scaler.fit_transform(data)
print(data.shape)
data[0]

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count


(342253, 114)


  new_unnormalized_variance -= correction**2 / new_sample_count


array([            nan,             nan,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
                   nan,             nan,             nan,             nan,
                   nan, -1.73199785e+00, -2.30951043e+00, -9.12217532e-01,
        2.42223508e+00,  3.73200231e+00,  3.73200374e+00,  3.73199964e+00,
       -1.57181192e-01, -1.56019127e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -2.00538548e+00,
       -3.50988703e-04, -7.61977717e-03,  4.59955245e-04, -8.68003631e-04,
        3.20741060e-04, -5.07723607e-04,  4.16904455e-04,  8.49254713e-04,
       -3.67864006e-04,  

In [4]:
model = MOMENTPipeline.from_pretrained(
    "AutonLab/MOMENT-1-large",
    model_kwargs={"task_name":"embedding"}
)
model.init()
model.to("cuda").float()



MOMENTPipeline(
  (normalizer): RevIN()
  (tokenizer): Patching()
  (patch_embedding): PatchEmbedding(
    (value_embedding): Linear(in_features=8, out_features=1024, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
  

In [13]:
if isinstance(data, np.ndarray):
    data = torch.tensor(data, dtype=torch.float32)

data.shape

torch.Size([1, 114, 342253])

In [None]:
input_mask = ~torch.isnan(data)

print(input_mask.shape)
input_mask[0, :]

In [None]:
data = data.T.unsqueeze(0) # since for MOMENT the data has to be [batch, features, sequence]

In [14]:
# for i in trange(10, desc="Processing"):
#     time.sleep(0.5)

In [16]:
chunk_size = 2048
all_embeddings = []

for start in trange(0, data.shape[2], chunk_size):
    end = min(start + chunk_size, data.shape[2])
    chunk = data[:, :, start:end].to("cuda")
    chunk_mask = torch.ones(
        chunk.shape[0],
        chunk.shape[2],
        dtype=bool, 
        device="cuda"
    )

    with torch.no_grad():
        out = model(x_enc=chunk, input_mask=chunk_mask)
        embeddings = out.embeddings.cpu()
        all_embeddings.append(embeddings)

    del chunk, chunk_mask, out, embeddings
    torch.cuda.empty_cache()
    gc.collect()

final_embeddings = torch.cat(all_embeddings, dim=0)

  return fn(*args, **kwargs)
100%|█████████████████████████████████████████████████████████████████████| 168/168 [1:32:42<00:00, 33.11s/it]


In [17]:
print(final_embeddings.shape)

torch.Size([168, 1024])


In [18]:
embedding = final_embeddings.mean(dim=0)

In [19]:
embedding[:15]

tensor([-0.0539,  0.0247, -0.0401, -0.0558,  0.0312, -0.0448,  0.0739, -0.1524,
        -0.0823,  0.0532, -0.1173, -0.0902, -0.0508, -0.0389,  0.0307])