In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch_geometric.nn import GCNConv
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# Перевод на GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = x.to(device)
x = torch.tensor([1.0, 2.0, 3.0], device=device)

In [None]:
# Полносвязный слой (Linear) выполняет линейное преобразование входных данных.
linear_layer = nn.Linear(in_features=128, out_features=64, bias=True)
x = torch.randn(32, 128)  # [batch_size, in_features]
linear_output = linear_layer(x)  # [32, 64]

# Свёрточный слой (Conv2D) используется для обработки изображений и извлечения признаков.
conv_layer = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
x = torch.randn(1, 3, 32, 32)  # [batch_size, in_channels, height, width]
conv_output = conv_layer(x)  # [1, 16, 32, 32]

# Эмбеддинги (Embedding) позволяют представить категориальные признаки в виде плотных векторов.
embedding_layer = nn.Embedding(num_embeddings=1000, embedding_dim=128)
tokens = torch.randint(0, 1000, (4, 10))  # [batch, sequence_length]
embedding_output = embedding_layer(tokens)  # [4, 10, 128]

# Self-Attention (MultiheadAttention) используется в трансформерах для внимания к разным частям входных данных.
multihead_attention = nn.MultiheadAttention(embed_dim=128, num_heads=8, dropout=0.1, batch_first=True)
x = torch.randn(5, 10, 128)  # [batch, sequence_length, embed_dim]
attn_output, _ = multihead_attention(x, x, x)  # [5, 10, 128]

# Нормализация данных для ускорения обучения.
layer_norm = nn.LayerNorm(normalized_shape=128)  # Нормализация по последней размерности
batch_norm = nn.BatchNorm1d(num_features=128)  # Нормализация по batch_size
x = torch.randn(32, 128)
layer_norm_output = layer_norm(x)  # [32, 128]
batch_norm_output = batch_norm(x)  # [32, 128]

# Dropout - метод регуляризации, обнуляет случайные нейроны с вероятностью p.
dropout = nn.Dropout(p=0.5)
dropout_output = dropout(x)

In [None]:
# Создание случайных тензоров с различными распределениями.
rand_tensor = torch.rand(3, 3)  # Равномерное распределение [0, 1]
randn_tensor = torch.randn(3, 3)  # Нормальное распределение [-1, 1]
zeros_tensor = torch.zeros(3, 3)  # Заполнено нулями
ones_tensor = torch.ones(3, 3)  # Заполнено единицами
eye_tensor = torch.eye(3)  # Единичная матрица
arange_tensor = torch.arange(0, 10, 2)  # [0, 2, 4, 6, 8]
linspace_tensor = torch.linspace(0, 10, 5)  # [0, 2.5, 5, 7.5, 10]

# Операции с тензорами
x = torch.tensor([[1, 2], [3, 4]])
sum_x = torch.sum(x)  # Сумма всех элементов
mean_x = torch.mean(x.float())  # Среднее значение
max_x, indices = torch.max(x, dim=1)  # Максимумы по каждой строке
min_x = torch.min(x)  # Минимальное значение

# Матричные операции
mat1 = torch.randn(2, 3)
mat2 = torch.randn(3, 4)
matmul_result = torch.matmul(mat1, mat2)  # Перемножение матриц

# Изменение формы тензоров
x = torch.randn(4, 5)
sliced_x = x[:, :3]  # Взять первые 3 столбца
reshaped_x = x.reshape(2, 10)  # Изменение формы тензора
unsqueezed_x = x.unsqueeze(0)  # Добавление оси
squeezed_x = unsqueezed_x.squeeze()  # Удаление оси

# Перестановка осей
x = torch.randn(2, 3, 4)
transposed_x = x.transpose(1, 2)  # Меняет оси местами
permuted_x = x.permute(2, 0, 1)  # Полная смена порядка осей

In [None]:
# TensorDataset позволяет объединить несколько тензоров в один датасет для удобного использования с DataLoader.
x_data = torch.randn(100, 10)
y_data = torch.randint(0, 2, (100,))
dataset = TensorDataset(x_data, y_data)

# Доступ к элементу:
first_sample = dataset[0]  # Кортеж (x, y)

# Создание DataLoader для обучения модели.
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=heads, batch_first=True)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)
        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size)
        )
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, value, key, query):
        attention = self.attention(query, key, value)[0]
        x = self.norm1(attention + query)
        forward = self.feed_forward(x)
        out = self.norm2(forward + x)
        return out


class CrossInteractionAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(CrossInteractionAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=heads, batch_first=True)
        self.norm = nn.LayerNorm(embed_size)
    
    def forward(self, user_embed, session_embed):
        attn_output, _ = self.attention(user_embed.unsqueeze(1), session_embed.unsqueeze(1), session_embed.unsqueeze(1))
        return self.norm(attn_output.squeeze(1) + user_embed)


class RecommenderModel(nn.Module):
    def __init__(self, num_items, num_users, embed_size, num_heads, dropout, forward_expansion, num_layers, num_user_features, num_session_features):
        super().__init__()
        self.item_embedding = nn.Embedding(num_items, embed_size)
        self.user_embedding = nn.Embedding(num_users, embed_size)
        self.position_embedding = nn.Embedding(100, embed_size)
        self.user_info_mlp = nn.Sequential(
            nn.Linear(num_user_features, embed_size), 
            nn.ReLU(), 
            nn.Linear(embed_size, embed_size)
        )
        self.session_info_mlp = nn.Sequential(
            nn.Linear(num_session_features, embed_size), 
            nn.ReLU(), 
            nn.Linear(embed_size, embed_size)
        )
        self.attention_pooling = nn.Linear(embed_size, 1)
        self.dropout = nn.Dropout(dropout)
        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(embed_size, num_heads, dropout, forward_expansion) for _ in range(num_layers)
        ])
        self.cross_attention = CrossInteractionAttention(embed_size, num_heads)
    
    def forward(self, session_items, positions, user_ids, candidates, user_features, session_features):
        item_embed = self.item_embedding(session_items)
        pos_embed = self.position_embedding(positions)
        x = self.dropout(item_embed + pos_embed)
        for transformer in self.transformer_blocks:
            x = transformer(x)

        attn_weights = torch.softmax(self.attention_pooling(x), dim=1)
        session_representation = (attn_weights * x).sum(dim=1)
        
        session_embed = session_representation + self.session_info_mlp(session_features)
        user_embed = self.user_embedding(user_ids) + self.user_info_mlp(user_features)
        user_embed = self.cross_attention(user_embed, session_embed)
        combined_representation = session_embed + user_embed

        # Вариативно
        # session_info_embed = self.session_info_mlp(session_info)
        # user_embed = self.user_embedding(user_ids)
        # user_info_embed = self.user_info_mlp(user_info)
        # user_embed = self.cross_attention(user_embed, session_representation)
        # combined_representation = session_representation + session_info_embed + user_embed + user_info_embed
        
        candidate_embeddings = self.item_embedding(candidates)
        
        scores = torch.matmul(combined_representation.unsqueeze(1), candidate_embeddings.transpose(1, 2)).squeeze(1)
        return scores
    
    def recommend(self, session_items, positions, user_ids, candidates, session_info, user_info):
        with torch.no_grad():
            scores = self.forward(session_items, positions, user_ids, candidates, session_info, user_info)
            return torch.topk(scores, k=5, dim=1).indices

# Гиперпараметры
num_items, num_users = 10000, 1000
embed_size, num_heads, dropout, forward_expansion, num_layers = 128, 8, 0.1, 4, 2
num_user_features, num_session_features = 10, 5
num_samples = 1000

# Искусственные данные
session_items = torch.randint(0, num_items, (num_samples, 10))
positions = torch.arange(10).repeat(num_samples, 1)
session_features = torch.randn(num_samples, num_session_features)

user_ids = torch.randint(0, num_users, (num_samples,))
user_features = torch.randn(num_samples, num_user_features)

candidates = torch.randint(0, num_items, (num_samples, 5))
targets = torch.randint(0, 5, (num_samples,))

dataset = TensorDataset(session_items, positions, user_ids, candidates, targets, user_features, session_features)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


model = RecommenderModel(num_items, num_users, embed_size, num_heads, dropout, forward_expansion, num_layers, num_user_features, num_session_features)
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train(model, train_loader, optimizer, loss_fn, epochs=100):

    def softmax_topk_loss(scores, targets, k=5):
        """
        Softmax Loss по top-k кандидатов.
        """
        topk_scores, topk_indices = torch.topk(scores, k, dim=1)  
        new_targets = torch.full_like(targets, fill_value=-1)
    
        for i in range(targets.shape[0]):
            if targets[i] in topk_indices[i]:
                new_targets[i] = (topk_indices[i] == targets[i]).nonzero(as_tuple=True)[0]
    
        mask = new_targets != -1
        filtered_scores = topk_scores[mask]
        filtered_targets = new_targets[mask]
    
        loss_fn = nn.CrossEntropyLoss()
        return loss_fn(filtered_scores, filtered_targets) if filtered_scores.shape[0] > 0 else torch.tensor(0.0, requires_grad=True)
    
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for session_items, positions, user_ids, candidates, targets, user_features, session_features in train_loader:
            optimizer.zero_grad()
            scores = model(session_items, positions, user_ids, candidates, user_features, session_features)
            loss = softmax_topk_loss(scores, targets, k=5)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

# Запуск обучения
train(model, train_loader, optimizer, loss_fn, epochs=10)

In [None]:
# В forward перед return для TripletMarginLoss
if negative_candidates is not None:
    negative_embeddings = self.item_embedding(negative_candidates)
    return scores, combined_representation, candidate_embeddings, negative_embeddings

loss_fn = nn.TripletMarginLoss(margin=1.0)
for session_items, positions, user_ids, candidates, negative_candidates, session_info, user_info in train_loader:
    optimizer.zero_grad()
    scores, user_repr, positive_repr, negative_repr = model(session_items, positions, user_ids, candidates, session_info, user_info, negative_candidates)
    
    loss = loss_fn(user_repr, positive_repr, negative_repr)
    loss.backward()
    optimizer.step()

In [None]:
class BPRLoss(nn.Module):
    def __init__(self):
        super(BPRLoss, self).__init__()

    def forward(self, pos_scores, neg_scores):
        """
        pos_scores: (batch_size, 1) - предсказания для позитивных айтемов
        neg_scores: (batch_size, num_neg) - предсказания для негативных айтемов
        """
        loss = -torch.log(sigmoid(pos_scores.unsqueeze(1) - neg_scores)).mean()
        return loss

def forward(self, session_items, positions, user_ids, positive_candidates, session_info, user_info, negative_candidates):
    """
    Возвращает предсказания для позитивных и негативных кандидатов
    """
    # ...

    pos_embeddings = self.item_embedding(positive_candidates)  # (batch_size, embed_size)
    neg_embeddings = self.item_embedding(negative_candidates)  # (batch_size, num_neg, embed_size)

    pos_scores = torch.matmul(combined_representation.unsqueeze(1), pos_embeddings.transpose(1, 2)).squeeze(1)
    neg_scores = torch.matmul(combined_representation.unsqueeze(1), neg_embeddings.transpose(1, 2)).squeeze(1)

    return pos_scores, neg_scores

pos_scores, neg_scores = model(session_items, positions, user_ids, positive_candidates, session_info, user_info, negative_candidates)
loss = bpr_loss(pos_scores, neg_scores)

optimizer.zero_grad()
loss.backward()
optimizer.step()

In [None]:
def ndcg_at_k(y_true, y_pred, k):
    """ Вычисляет Normalized DCG (NDCG@K) """

    def dcg_at_k(relevance, k):
        """ Вычисляет Discounted Cumulative Gain (DCG@K) """
        relevance = relevance[:k]
        return np.sum(relevance / np.log2(np.arange(2, len(relevance) + 2)))
    
    order = np.argsort(y_pred)[::-1]  # Сортируем предсказания по убыванию
    ideal_order = np.argsort(y_true)[::-1]  # Идеальный порядок (по истинной релевантности)

    dcg = dcg_at_k(y_true[order], k)
    idcg = dcg_at_k(y_true[ideal_order], k)  # DCG для идеального порядка
    return dcg / idcg if idcg > 0 else 0


def map_at_k(y_true_list, y_pred_list, k):
    """ Вычисляет Mean Average Precision (MAP@K) по всем пользователям """

    def average_precision_at_k(y_true, y_pred, k):
        """ Вычисляет AP@K (Average Precision) """
        order = np.argsort(y_pred)[::-1]
        relevance = y_true[order]
        
        num_relevant = 0
        precision_sum = 0
        
        for i in range(k):
            if relevance[i] == 1:
                num_relevant += 1
                precision_sum += num_relevant / (i + 1)  # Precision@i
        return precision_sum / num_relevant if num_relevant > 0 else 0
    return np.mean([average_precision_at_k(y_true, y_pred, k) for y_true, y_pred in zip(y_true_list, y_pred_list)])

def recall_at_k(y_true, y_pred, k):
    """ Вычисляет Recall@K """
    order = np.argsort(y_pred)[::-1][:k]  # Берем топ-K предсказаний
    return np.sum(y_true[order]) / np.sum(y_true) if np.sum(y_true) > 0 else 0

def evaluate_model(model, dataloader, k=10):
    """ Оценивает модель по метрикам NDCG@K, MAP@K, Recall@K """
    all_ndcg, all_map, all_recall = [], [], []
    
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            session_items, positions, user_ids, candidates, session_info, user_info, y_true = batch

            scores = model(session_items, positions, user_ids, candidates, session_info, user_info)
            scores = scores.cpu().numpy()
            y_true = y_true.cpu().numpy()

            for i in range(len(scores)):  # По каждому пользователю
                all_ndcg.append(ndcg_at_k(y_true[i], scores[i], k))
                all_map.append(average_precision_at_k(y_true[i], scores[i], k))
                all_recall.append(recall_at_k(y_true[i], scores[i], k))

    return {
        "NDCG@{}".format(k): np.mean(all_ndcg),
        "MAP@{}".format(k): np.mean(all_map),
        "Recall@{}".format(k): np.mean(all_recall)
    }

# Должен быть даталоадер с батчами (session_items, positions, user_ids, candidates, session_info, user_info, y_true)
# y_true - 0/1 релевантность кандидатов (1 - релевантный, 0 - нерелевантный)
metrics = evaluate_model(model, test_dataloader, k=10)
print(metrics)

In [None]:
import torch.quantization

model = RecommenderModel(num_items, num_users, embed_size, num_heads, dropout, forward_expansion, num_layers, num_user_features, num_session_features)

# Только если на CPU
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Linear},
    dtype=torch.qint8
)

In [None]:
def get_reward(chosen_item, user_id):
    # Получаем логи взаимодействий пользователя
    user_interactions = interaction_logs[user_id]
    
    # Если пользователь кликнул на товар – даем награду 1.0
    if chosen_item in user_interactions["clicked_items"]:
        return 1.0  
    
    # Если купил – еще более высокая награда
    elif chosen_item in user_interactions["purchased_items"]:
        return 2.0  
    
    # Иначе штрафуем за плохую рекомендацию
    return -0.5


def get_user_context(user_id):
    # Допустим, у нас есть словарь с контекстом пользователей
    user_data = user_database[user_id]
    
    # Например, контекст – это демография, устройство и время суток
    context = torch.tensor([
        user_data["age"] / 100,         # Возраст нормируем к 0-1
        1 if user_data["device"] == "mobile" else 0,  # 1, если мобильный
        user_data["time_of_day"] / 24   # Время суток, нормируем к 0-1
    ], dtype=torch.float32)
    
    return context


class ContextualBandit:
    def __init__(self, num_actions, context_dim):
        self.num_actions = num_actions
        self.context_dim = context_dim
        self.weights = torch.randn(num_actions, context_dim)

    def select_action(self, context, candidates):
        """Выбираем товар из списка кандидатов"""
        candidate_weights = self.weights[candidates]  # Берём веса только для кандидатов
        scores = torch.matmul(candidate_weights, context)  # Оцениваем товары
        return candidates[torch.argmax(scores).item()]  # Выбираем лучший

    def update(self, context, action, reward, lr=0.1):
        """Обновляем веса для конкретного товара"""
        pred_reward = torch.dot(self.weights[action], context)
        self.weights[action] += lr * (reward - pred_reward) * context


# 1. Получаем рекомендации из трансформера
top_k_candidates = transformer_recommender.recommend(session_items, positions, user_ids, candidates, session_info, user_info)

# 2. Выбираем оптимальный товар из кандидатов с помощью Bandit
context = get_user_context(user_id)  # Например, демография, устройство и т.д.
chosen_item = bandit.select_action(context, top_k_candidates)

# 3. Показываем пользователю товар и получаем награду (например, клик)
reward = get_reward(chosen_item, user_id)

# 4. Обновляем веса Bandit
bandit.update(context, chosen_item, reward)

In [None]:
class MultiArmedBandit:
    def __init__(self, num_arms, epsilon=0.1):
        self.num_arms = num_arms
        self.epsilon = epsilon  # Доля случайных действий
        self.q_values = np.zeros(num_arms)  # Средняя награда за каждый товар
        self.counts = np.zeros(num_arms)  # Количество показов каждого товара
    
    def select_arm(self):
        """Выбор товара: либо лучший, либо случайный"""
        if np.random.rand() < self.epsilon:
            return np.random.randint(self.num_arms)  # Исследуем случайный товар
        return np.argmax(self.q_values)  # Выбираем товар с лучшей наградой
    
    def update(self, chosen_arm, reward):
        """Обновление оценок награды"""
        self.counts[chosen_arm] += 1
        self.q_values[chosen_arm] += (reward - self.q_values[chosen_arm]) / self.counts[chosen_arm]

# === Используем бандита ===
num_items = 10  # Допустим, у нас 10 товаров
bandit = MultiArmedBandit(num_items)

for _ in range(1000):  # Показываем товары 1000 раз
    chosen_item = bandit.select_arm()  # Выбираем товар
    reward = np.random.choice([0, 1], p=[0.8, 0.2])  # Случайная симуляция кликов (20% вероятность)
    bandit.update(chosen_item, reward)

print("Лучший товар по мнению бандита:", np.argmax(bandit.q_values))

In [None]:
class UCB_Bandit:
    def __init__(self, num_arms, c=2):
        self.num_arms = num_arms
        self.c = c  # Коэффициент доверительного интервала
        self.q_values = np.zeros(num_arms)  # Средняя награда
        self.counts = np.zeros(num_arms)  # Кол-во показов каждого товара
        self.total_count = 0  # Общее число показов

    def select_arm(self):
        """Выбираем товар на основе UCB"""
        if np.any(self.counts == 0):
            return np.argmin(self.counts)  # Сначала тестируем все товары хотя бы 1 раз
        ucb_values = self.q_values + self.c * np.sqrt(np.log(self.total_count) / self.counts)
        return np.argmax(ucb_values)

    def update(self, chosen_arm, reward):
        """Обновляем веса на основе полученной награды"""
        self.total_count += 1
        self.counts[chosen_arm] += 1
        self.q_values[chosen_arm] += (reward - self.q_values[chosen_arm]) / self.counts[chosen_arm]

# === Используем UCB Bandit ===
num_items = 10
bandit = UCB_Bandit(num_items)

for _ in range(1000):  
    chosen_item = bandit.select_arm()
    reward = np.random.choice([0, 1], p=[0.7, 0.3])  # 30% вероятность клика
    bandit.update(chosen_item, reward)

print("Лучший товар по мнению UCB:", np.argmax(bandit.q_values))

In [None]:
class ThompsonSamplingBandit:
    def __init__(self, num_arms):
        self.num_arms = num_arms
        self.successes = np.zeros(num_arms)  # Число успешных кликов
        self.failures = np.zeros(num_arms)  # Число отказов

    def select_arm(self):
        """Выбираем товар, сэмплируя из бета-распределения"""
        beta_samples = np.random.beta(self.successes + 1, self.failures + 1)
        return np.argmax(beta_samples)

    def update(self, chosen_arm, reward):
        """Обновляем вероятностную модель"""
        if reward == 1:
            self.successes[chosen_arm] += 1
        else:
            self.failures[chosen_arm] += 1

# === Используем Thompson Sampling ===
num_items = 10
bandit = ThompsonSamplingBandit(num_items)

for _ in range(1000):  
    chosen_item = bandit.select_arm()
    reward = np.random.choice([0, 1], p=[0.6, 0.4])  # 40% вероятность клика
    bandit.update(chosen_item, reward)

print("Лучший товар по мнению Thompson Sampling:", np.argmax(bandit.successes / (bandit.successes + bandit.failures)))