In [6]:
from models.influence import calc_influence_RS_single
from models import RecLoader
import utils, torch, os
import numpy as np
import scipy.sparse as sp
from torch.utils import data
import torch.nn as nn
from tqdm import tqdm
from timm.scheduler.cosine_lr import CosineLRScheduler
from sklearn.preprocessing import MinMaxScaler

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -0.5).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    mx = mx.dot(r_mat_inv)
    return mx

# 受害者模型、数据集
vicm, dataset = 'CML', 'lastfm'

rec = RecLoader()
trainarr = rec.train_matrix.toarray()
user_feat, _ = utils.init_emb_by_feature(trainarr, name=dataset)
scaler = MinMaxScaler(feature_range=(0, 1))
user_feat = torch.tensor(scaler.fit_transform(user_feat), dtype=torch.float)
feat_dim = user_feat.shape[1]
train_loader, test_loader = rec.train_loader, rec.train_loader
model, test_id_num, gpu = rec.net, range(user_feat.shape[0]), -1
rd, r = 2, 1
influence, harmful, helpful, _ = calc_influence_RS_single(model, train_loader, test_loader, test_id_num, gpu, rd, r)
inf_dict = {}
for h, i in zip(harmful, influence):
    inf_dict[h] = i.item()
influence = torch.tensor([inf_dict[i] for i in range(len(influence))], dtype=torch.float).reshape(-1, 1)

tensorset = data.TensorDataset(user_feat, influence)
iter_data = data.DataLoader(tensorset, batch_size=32, shuffle=True)

def init_weights(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0, 0.01)
        m.bias.data.normal_(0)

net = torch.nn.Sequential(
    nn.Linear(feat_dim, 64),
    nn.ReLU(),
    nn.Linear(64, 1)
)

if not os.path.exists(f'./saved/influence/{dataset}_{vicm}.pt'):
    net.apply(init_weights)
    loss, epochs = nn.MSELoss(), 200
    optimizer = torch.optim.Adam(lr=1e-5, params=net.parameters())

    num_steps = epochs * len(iter_data)
    lr_scheduler = CosineLRScheduler(
                optimizer,
                t_initial=num_steps,
                lr_min=5e-6,
                cycle_limit=1,
                t_in_epochs=False,
    )
    for epoch in range(epochs):
        losses = []
        for idx, (x, y) in enumerate(iter_data):
            l = loss(net(x), y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            lr_scheduler.step_update(epoch * num_steps + idx)
            losses.append(l.item())
    torch.save(net.state_dict(), f'./saved/influence/{dataset}_{vicm}.pt')
else:
    net.load_state_dict(torch.load(f'./saved/influence/{dataset}_{vicm}.pt'))

----- LOAD DATA in train recommender -----
load data from ./data/automotive/preprocess/train.data ...
----- LOAD DATA in train recommender -----
load data from ./data/automotive/preprocess/test.data ...
----- DATA INFO in train recommender -----
Number of users : 2928 , Number of items : 1835. 
Train size : 18425 , Test size : 2048. 
Saving checkpoint to ./saved/recommender/WMF_sgd.pt
Loaded checkpoint from ./saved/recommender/WMF_sgd.pt
[Evaluation recommender] topk=[1, 10, 20, 50, 100]
precision=[0.04, 0.02, 0.01, 0.01, 0.01], recall=[0.03, 0.14, 0.19, 0.28, 0.36], ndcg=[0.04, 0.04, 0.04, 0.04, 0.04]
[Evaluation recommender after attack][0.1 s] topk=[1, 10, 20, 50, 100]
HitUserNum=[42], TargetAvgRank=[55.5], TargetHR=[0.0, 0.002, 0.004, 0.012, 0.029], TargetNDCG=[0.0, 0.001, 0.001, 0.003, 0.006]


In [24]:
net(user_feat[98])

tensor([0.9791], grad_fn=<AddBackward0>)