In [1]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np

# фиксируем сиды
torch.manual_seed(42)
np.random.seed(42)

# Генерация данных

In [2]:
n_samples = 10_000  # общее кол-во экземпляров
embedding_dim = 64  # размер эмбеддинга
noise_std = 0.05  # уровень шума

X = torch.randn(n_samples, embedding_dim) * 2
w_true = torch.randn(embedding_dim, 1) * 0.7
b_true = torch.tensor([0.7])

# таргет = sigmoid(linear + noise) -> значения в (0,1)
linear = X @ w_true + b_true
y = torch.sigmoid(linear + noise_std * torch.randn_like(linear)).squeeze(1)

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X.numpy(), y.numpy(), test_size=0.2, random_state=42
)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test,  dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test  = torch.tensor(y_test,  dtype=torch.float32).unsqueeze(1)

# нормализация признаков
mean = X_train.mean(0, keepdim=True)
std  = X_train.std(0, keepdim=True) + 1e-6
X_train_norm = (X_train - mean) / std
X_test_norm  = (X_test  - mean) / std

In [3]:
print(f'X_train_norm size:\t{X_train_norm.size()}')
print(f'y_train size:\t\t{y_train.size()}\n')

print(f'X_test_norm size:\t{X_test_norm.size()}')
print(f'y_test size:\t\t{y_test.size()}')

X_train_norm size:	torch.Size([8000, 64])
y_train size:		torch.Size([8000, 1])

X_test_norm size:	torch.Size([2000, 64])
y_test size:		torch.Size([2000, 1])


In [4]:
X_train

tensor([[-0.7851, -1.6373,  0.4749,  ..., -1.5135,  0.3622, -3.1485],
        [ 0.6945, -3.7989,  2.4435,  ..., -2.0761,  1.5781, -2.2293],
        [-0.5378, -1.7930,  5.2560,  ..., -0.3883,  2.8150,  3.4921],
        ...,
        [ 1.2763,  0.2720,  2.2724,  ..., -2.5030, -0.0525, -2.6875],
        [-0.9372,  2.5964,  0.4257,  ..., -2.0897,  0.5140,  2.2735],
        [-1.2076, -3.3293, -5.6350,  ...,  2.7956, -0.4004, -1.7049]])

In [5]:
X_train_norm

tensor([[-0.3778, -0.8151,  0.2425,  ..., -0.7549,  0.1777, -1.5835],
        [ 0.3550, -1.8820,  1.2294,  ..., -1.0335,  0.7883, -1.1224],
        [-0.2553, -0.8919,  2.6395,  ..., -0.1976,  1.4095,  1.7472],
        ...,
        [ 0.6431,  0.1273,  1.1437,  ..., -1.2450, -0.0306, -1.3522],
        [-0.4532,  1.2745,  0.2178,  ..., -1.0403,  0.2539,  1.1360],
        [-0.5871, -1.6502, -2.8208,  ...,  1.3794, -0.2053, -0.8594]])

In [6]:
y_train

tensor([[7.3248e-01],
        [1.6775e-04],
        [1.0000e+00],
        ...,
        [1.0000e+00],
        [5.0067e-01],
        [9.8599e-01]])

In [7]:
torch.save(X_train, '../data/worker1/x_train.pth')
torch.save(X_train_norm, '../data/worker1/x_train_norm.pth')
torch.save(X_test, '../data/worker1/x_test.pth')
torch.save(X_test_norm, '../data/worker1/x_test_norm.pth')

torch.save(y_train, '../data/worker2/y_train.pth')
torch.save(y_test, '../data/worker2/y_test.pth')

In [17]:
!mkdir ../data/homework

In [18]:
torch.save(X_train_norm[:2000], '../data/homework/x_train_norm_worker1.pth')
torch.save(y_train[:2000], '../data/homework/y_train_worker1.pth')

torch.save(X_train_norm[2000:3200], '../data/homework/x_train_norm_worker2.pth')
torch.save(y_train[2000:3200], '../data/homework/y_train_worker2.pth')

torch.save(X_test_norm[:800], '../data/homework/x_test_norm_worker1.pth')
torch.save(y_test[:800], '../data/homework/y_test_worker1.pth')

# SGD

## Model

In [8]:
class LinearRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)
    def forward(self, x):
        return self.linear(x)

model = LinearRegressor(embedding_dim)
nn.init.normal_(model.linear.weight, mean=0.0, std=0.01)
nn.init.constant_(model.linear.bias, 0.0)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)

## Обучение

In [9]:
t0 = time.time()

batch_size = 512
n_epochs = 30
dataset = torch.utils.data.TensorDataset(X_train_norm, y_train)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(n_epochs):
    model.train()
    for xb, yb in loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
    if (epoch+1) % 5 == 0 or epoch == 0:
        print(f"[Epoch {epoch+1:02d}] loss={loss:.6f}")
        
print(f"Elapsed time: {time.time() - t0:.4f} seconds")

[Epoch 01] loss=0.065295
[Epoch 05] loss=0.064928
[Epoch 10] loss=0.062764
[Epoch 15] loss=0.070506
[Epoch 20] loss=0.067849
[Epoch 25] loss=0.065010
[Epoch 30] loss=0.068480
Elapsed time: 0.9147 seconds


# Валидация

In [10]:
def metrics(y_true, y_pred):
    """Расчет метрик"""
    mse = torch.mean((y_true - y_pred) ** 2).item()
    return mse

In [11]:
model.eval()
with torch.no_grad():
    y_pred_sgd = model(X_test_norm)

mse_s = metrics(y_test, y_pred_sgd)
print(f"SGD (MSELoss):   MSE={mse_s:.6f}")

SGD (MSELoss):   MSE=0.068981


In [12]:
(-56694060547302 + 56694060678374) / 2 ** 16

2.0

In [13]:
model.linear.weight

Parameter containing:
tensor([[ 2.7496e-02,  1.6377e-02,  5.0892e-02, -1.0467e-01,  7.4125e-03,
         -7.5967e-02, -9.7439e-02, -7.9946e-03,  2.9753e-02,  7.5591e-02,
          6.8202e-02,  3.6376e-02,  9.5321e-03,  5.9654e-02, -1.1512e-01,
         -1.9416e-02,  8.0640e-02, -4.5102e-02,  3.0652e-02,  2.5311e-02,
          6.2073e-02, -1.8994e-02,  2.7111e-02,  3.6852e-02, -6.8253e-02,
          6.3581e-02,  1.6095e-02, -3.2301e-02, -3.1563e-02, -4.7524e-02,
         -7.9002e-02,  4.1198e-02,  2.1871e-02,  8.3317e-02, -1.8258e-02,
          1.2448e-02,  5.5225e-02,  9.3417e-02, -5.1130e-02, -9.4515e-05,
          3.6994e-02, -6.2104e-02, -7.0079e-02,  1.3150e-02, -3.6580e-03,
          7.7845e-03, -4.5025e-02, -4.9922e-02,  3.2048e-02, -4.1664e-02,
          1.9410e-02, -1.5300e-02,  3.2155e-02, -5.2456e-03,  3.0242e-02,
         -6.1409e-03, -6.6406e-02, -6.3751e-02,  4.8579e-02, -2.6156e-02,
         -2.6324e-04,  5.2959e-02, -9.5709e-02,  5.5153e-02]],
       requires_grad=True)

In [14]:
7.4136 / 0.8963

8.27133772174495

In [15]:
(7792451278391206618 + -7792451278391172624) / 2 ** 16

0.518707275390625

In [16]:
model.linear.bias

Parameter containing:
tensor([0.5253], requires_grad=True)