In [3]:
import sys
import os

In [5]:
os.chdir("C:/Users/Илья/Desktop/interp_dev")  #установка вручную
print("Working dir:", os.getcwd())

Working dir: C:\Users\Илья\Desktop\interp_dev


In [7]:
import torch
import torch.nn as nn
from torchinfo import summary

from formants.transformer.formant_predictor import FormantPredictor


batch_size = 2
seq_len = 5
hidden_dim = 512
num_formants = 3
vocab_size = 71
pad_token_id = 0

model = FormantPredictor(
    vocab_size=vocab_size,
    hidden_dim=hidden_dim,
    num_formants=num_formants,
    pad_token_id=pad_token_id,
    max_len=256,
    dropout=0.1
)


In [9]:
token_ids = torch.randint(0, vocab_size, (batch_size, seq_len))  

speech_embedding = torch.randn(batch_size, 256)  

output = model(token_ids, speech_embedding)  
print("✅ Output shape:", output.shape)


✅ Output shape: torch.Size([2, 5, 3])


In [11]:
summary(
    model,
    input_data=(token_ids, speech_embedding),
    col_names=["input_size", "output_size", "num_params", "trainable"],
    depth=3
)


Layer (type:depth-idx)                        Input Shape               Output Shape              Param #                   Trainable
FormantPredictor                              [2, 5]                    [2, 5, 3]                 --                        True
├─TransformerEncoderAdaLN: 1-1                [2, 5]                    [2, 5, 512]               --                        True
│    └─Embedding: 2-1                         [2, 5]                    [2, 5, 512]               36,352                    True
│    └─LearnablePositionalEncoding: 2-2       [2, 5, 512]               [2, 5, 512]               --                        True
│    │    └─Embedding: 3-1                    [1, 5]                    [1, 5, 512]               131,072                   True
│    └─Dropout: 2-3                           [2, 5, 512]               [2, 5, 512]               --                        --
│    └─MlpAdaLN: 2-4                          [2, 256]                  [2, 512]              

In [13]:
# Целевые форманты (фиктивные)
target_formants = torch.randn(batch_size, seq_len, num_formants)

# Функция потерь и оптимизатор
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [15]:
model.train()
optimizer.zero_grad()

# Прямой проход
output = model(token_ids, speech_embedding)

# Потери
loss = loss_fn(output, target_formants)
print("🔁 Dummy loss:", loss.item())

# Обратный проход и шаг оптимизатора
loss.backward()
optimizer.step()
print("✅ Backpropagation and update completed.")


🔁 Dummy loss: 1.825307011604309
✅ Backpropagation and update completed.
