In [19]:
import sys
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
sys.path.append("..")

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification

from data_processing.data_generate import generate_random_matrix

FFM公式如下:

$$
\varnothing FfM(w,x) = \hat{y}(x) = w_0 + \sum_{i=1}^{n} w_i x_i + \sum_{j_1 = 1}^{n} \sum_{j2 = j1+1}^{n} (w_{j1,f2} \cdot w_{j_2,f1})x_{j_1}x_{j_2}
$$

In [85]:
class FFM(nn.Module):
    def __init__(self, num_features, num_fields, embedding_dim):
        """
        FFM 模型初始化
        :param num_features: 特征总数（特征编码后的维度）
        :param num_fields: 域（Field）的总数
        :param embedding_dim: 隐向量维度
        """
        super(FFM, self).__init__()
        self.num_features = num_features
        self.num_fields = num_fields
        self.embedding_dim = embedding_dim

        # 初始化参数
        self.w0 =nn.Parameter(torch.zeros(1))
        self.w = nn.Parameter(torch.randn(num_features))

        # Field-aware 隐向量部分
        # 每个特征对每个其他域维护一个隐向量
        # 形状: (num_features, num_fields, embedding_dim)
        self.embeddings = nn.Parameter(
            torch.randn(num_features,num_fields,embedding_dim)
        )

    def forward(self,X,filed_map):
        """
        :param x: 输入张量，形状 (batch_size, num_features)，稀疏 one-hot 编码
        :param field_map: 每个特征所属的域编号，形状 (num_features,)
        """
        # -------------------- 线性部分计算 --------------------
        # 线性项: w0 + sum(wi * xi)
        linear_terms = self.w0 + torch.sum(self.w * X, dim = 1) # (batch_size,)

        # -------------------- 交叉部分计算 --------------------
        batch_size = X.shape[0]
        cross_terms = torch.zeros(batch_size,device = X.device)# 存储交叉项的结果

        for i in range(self.num_features):
            for j in range(i+1,self.num_features):
                # 只计算非零元素的交叉项

                xi = X[:,i] # (batch_size,)
                xj = X[:,j] # (batch_size,)
                non_zero = (xi != 0) & (xj != 0) #仅同时处理非0特征对 逻辑与（&）操作会逐元素执行，返回一个布尔数组，表示 xi 和 xj 在相应位置上同时非零的位置。


                if non_zero.any(): #至少存一个非0特征对
                    fi = filed_map[i]
                    fj = filed_map[j]

                    #提取对应的隐向量
                    vi = self.embeddings[i,fi,:]
                    vj = self.embeddings[j,fj,:]

                    #计算点积并加权
                    interaction = torch.sum(vi * vj)# 标量
                    cross = interaction * xi[non_zero] * xj[non_zero]
                    cross_terms[non_zero] += cross
        # -------------------- 输出 --------------------
        output = linear_terms + cross_terms
        return torch.sigmoid(output) # 适用于二分类（如CTR预测)
    
    def predict(self,X,filed_map):
        """
        预测函数
        :param x: 输入张量，形状 (batch_size, num_features)，稀疏 one-hot 编码
        :param field_map: 每个特征所属的域编号，形状 (num_features,)
        """
        # 计算模型输出
        output = self.forward(X,filed_map)
        
        return output
    
        

In [86]:
 # -------------------- 示例数据 --------------------
    # 假设有3个域：
    # Field 0: 用户特征（性别、年龄）
    # Field 1: 广告特征（类别、价格）
    # Field 2: 上下文特征（时段、位置）

num_features = 6  # 特征总数
num_fields = 3    # 域总数
embedding_dim = 4 # 隐向量维度
epoch = 100

#随机生成数据

X,y = make_classification(
    n_samples = 100,
    n_features = num_features,
    n_informative = 5,
    n_redundant = 0,
    n_classes = 2,
    random_state = 42
)

X_tensor = torch.tensor(X, dtype = torch.float32)
y_tensor = torch.tensor(y, dtype = torch.float32)

# 定义每个特征所属的域编号
field_map = torch.tensor([0, 0, 0, 1, 1, 2])  # 6个特征分别属于域0,0,0,1,1,2
    

In [87]:
# 模型初始化
model = FFM(num_features,num_fields, 5)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [88]:
#训练过程
pbar = tqdm(range(epoch), desc = "Training FM", ncols = 100, unit = "iter")
for i in tqdm(pbar):
    # 前向传播
    y_pred = model(X_tensor, field_map)
    # 计算损失
    loss = criterion(y_pred, y_tensor)

    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    pbar.set_postfix({'loss': loss.item()})

Training FM: 100%|██████████████████████████████████| 100/100 [00:00<00:00, 235.71iter/s, loss=1.43]
100%|██████████| 100/100 [00:00<00:00, 236.39it/s]


In [95]:
#预测结果对比
res = pd.DataFrame(X)
res['y'] = y
res['y_pred'] = model.predict(X_tensor,filed_map = field_map).detach().numpy()
res['y_pred'] = round(res['y_pred'],4)
res

Unnamed: 0,0,1,2,3,4,5,y,y_pred
0,-0.768120,-1.935827,1.129026,2.478217,-0.118520,-0.361891,0,0.0000
1,-0.306700,0.379947,2.513930,-3.518210,-1.047075,0.252756,1,1.0000
2,0.628753,2.049215,0.819933,1.141112,1.427840,0.835565,0,0.9979
3,-2.264216,-0.521737,-1.008664,0.364217,0.192779,0.000699,0,0.0408
4,4.487371,-4.772025,-2.745894,-1.014853,-1.712937,-0.202846,0,1.0000
...,...,...,...,...,...,...,...,...
95,0.527332,-2.416953,2.016924,1.084541,1.767058,2.643572,0,0.0000
96,0.453731,1.986156,0.025697,2.499804,0.381789,-2.320737,1,0.9998
97,1.301760,-0.197759,-0.655325,0.579585,0.994079,-1.017192,1,0.9998
98,0.583668,-0.805124,0.989193,-1.269675,-0.171788,-1.753107,1,0.9999
