In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import cv2
import warnings
import copy
import random
from keras import layers, models
from keras.utils import to_categorical
from keras.datasets import mnist
from keras.optimizers import Adam

In [None]:
def anneal(alpha, mask_a, d=2, lower_bound=20, upper_bound=10):
    alpha = alpha.copy()
    mask_b = np.random.choice([True, False], size=alpha.shape)
    mask = mask_a ^ mask_b
    step = np.random.randint(0, d+1, size=alpha.shape)/225.
    start_h = 0
    end_h = 1
    start_w = np.random.randint(0, lower_bound)
    end_w = np.random.randint(len(alpha) - upper_bound, len(alpha))
    masksliced = np.zeros(alpha.shape, dtype=bool)
    masksliced[start_w:end_w] = mask[start_w:end_w]
    alpha[masksliced] += step[masksliced]
    return alpha

In [None]:
def rmse_score(model, user_ind,user_embeddings, item_embeddings, alpha):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        alpha= alpha.reshape(-1)
        predicted_ratings = torch.matmul(user_embeddings[user_ind], item_embeddings.T)
        interaction_tensor = torch.tensor(alpha, dtype=torch.float32)
        target= interaction_tensor
        rmse = np.sqrt(mean_squared_error(target, predicted_ratings))  # Compute RMSE

    return rmse

In [None]:
def fitness(user_embeddings, item_embeddings, alpha_population, model, lambda_value = 0.1):
    fitness_values = []
    for alpha_ind, alpha in alpha_population:
        # here alpha represents that one pparticular user's interaction with all the items
        # alpha_population represents manipulated interaction matrix for some users

        error = lambda_value * rmse_score(model, alpha_ind ,user_embeddings, item_embeddings, alpha)
        alpha_fitness = 1 + error - np.linalg.norm(alpha)
        fitness_values.append(abs(np.max(alpha_fitness)))
    return fitness_values

In [None]:
def twoplayergame_sa(user_embeddings, item_embeddings, interaction_matrix, model):
    maxpayoff = 0
    exitloop = False
    Tmax = 10
    Tmin = 5
    v = 50
    p = 0.2
    mask = np.random.choice([True,False], size=interaction_matrix[0].shape)
    Tcurr = Tmax
    population = [(i,interaction_matrix[i]) for i in range(interaction_matrix.shape[0])]
    random.shuffle(population)
    pop_size = len(population) // 3
    ac = population[:pop_size].copy()
    ag = population[pop_size:2*pop_size].copy()
    an = population[2*pop_size:].copy()
    evalc = fitness(user_embeddings, item_embeddings, ac, model)
    maxpayoff = max(fitness(user_embeddings, item_embeddings, ag, model))
    while not exitloop:
        evalg = fitness(user_embeddings, item_embeddings, ag,model)
        curr_index = np.argmax(evalg)
        currpayoff = evalg[curr_index]
        print("The current Payoff is:",currpayoff)
        if abs(currpayoff - maxpayoff) < 0.1:
            maxpayoff = currpayoff
            while Tcurr >= Tmin:
                i = 1
                while i <= v:
                    temp = []
                    for ind, interaction in ac:
                        temp.append((ind,anneal(interaction,mask)))
                    an = temp.copy()
                    evaln = fitness(user_embeddings, item_embeddings, an,model)
                    if max(evaln) > max(evalc):
                        ac = an.copy()
                        evalc = evaln.copy()
                        if max(evalg) < max(evaln):
                            ag = an.copy()
                            evalg = evaln.copy()
                    else:
                        if np.random.random() <= np.exp((max(evaln) - max(evalc)) / Tcurr):
                            ac = an.copy()
                            evalc = evaln.copy()
                    i += 1
                Tcurr *= p
            ag = ac.copy()
        else:
            exitloop = True
    return ag[np.argmax(fitness(user_embeddings, item_embeddings, ag, model))]

In [None]:
def generate_manipulated_data(matrix, A_s):
    #here A_s is manippulated data for some users
    alphas=[]
    for i in range(len(A_s)):
        alphas.append(A_s[i][1])
    print(alphas)
    X_manipulated = np.concatenate([matrix, alphas], axis=0)
    return X_manipulated

In [None]:
def adversarial_manipulation(user_embeddings, item_embeddings, interaction_matrix, model,M):
    A_s = []
    for i in range(1, M+1):
        a_i = twoplayergame_sa(user_embeddings, item_embeddings, interaction_matrix, model)
        A_s.append(a_i)

    interaction_matrix_manipulated= generate_manipulated_data(interaction_matrix, A_s)
    return interaction_matrix_manipulated

### Models

1. Normal - with the normal interaction matrix we generate recommendations for some existing user/ new user
2. manipulated -
3. secure -  train the gcn with adversarial interaction matrix and generate recommendations for some existing user/ new user

The metric can be rmse or the top k predictions produced in each case

In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m61.4/63.1 kB[0m [31m36.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [None]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# Set device: use GPU if available, otherwise fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define base path to save trained models and other outputs
save_base_path = "/path/to/save"

In [None]:
import zipfile
import requests
from io import BytesIO

In [None]:
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'

# Download the dataset
response = requests.get(url)
zip_file = zipfile.ZipFile(BytesIO(response.content))

# Extract the ratings and movies CSV files
ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))

# Preview the datasets
print(ratings.head())
print(movies.head())

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [None]:
# Get unique users and items from the ratings dataset
users = ratings['userId'].unique()
items = ratings['movieId'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['userId'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['movieId'].apply(lambda x: item_to_idx[x])

In [None]:
print(users.size)
print(items.size)

610
9724


In [None]:
print(ratings)

        userId  movieId  rating   timestamp  user_idx  item_idx
0            1        1     4.0   964982703         0         0
1            1        3     4.0   964981247         0         1
2            1        6     4.0   964982224         0         2
3            1       47     5.0   964983815         0         3
4            1       50     5.0   964982931         0         4
...        ...      ...     ...         ...       ...       ...
100831     610   166534     4.0  1493848402       609      3120
100832     610   168248     5.0  1493850091       609      2035
100833     610   168250     5.0  1494273047       609      3121
100834     610   168252     5.0  1493846352       609      1392
100835     610   170875     3.0  1493846415       609      2873

[100836 rows x 6 columns]


In [None]:
# Create a pivot table where rows are users, columns are items, and values are ratings
interaction_matrix = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [None]:
interaction_array= np.array(interaction_matrix)
print(interaction_array)
print('matrix dimensions : ', interaction_array.shape)

[[4.  4.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 2.  0.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  5.  ... 3.  3.5 3.5]]
matrix dimensions :  (610, 9724)


In [None]:
class UserItemDataset(Dataset):
    def __init__(self, ratings):
        self.ratings = ratings

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        row = self.ratings.iloc[idx]
        return {
            'user_idx': torch.tensor(row['user_idx'], dtype=torch.long),
            'item_idx': torch.tensor(row['item_idx'], dtype=torch.long),
            'rating': torch.tensor(row['rating'], dtype=torch.float),
        }

# Create train, validation, and test splits (80% train, 10% validation, 10% test)
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
# test_size = len(ratings) - train_size - val_size

train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

# Create data loaders for batching
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)
# val_loader = DataLoader(UserItemDataset(ratings.iloc[val_dataset.indices]), batch_size=32, shuffle= True)
# test_loader = DataLoader(UserItemDataset(ratings.iloc[test_dataset.indices]), batch_size=32, shuffle=False)

In [None]:
class MFModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_size):
        super(MFModel, self).__init__()
        # Create embedding layers for users and items
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)

    def forward(self, user_ids, item_ids):
        # Get user and item embeddings
        user_embedding = self.user_embedding(user_ids)
        item_embedding = self.item_embedding(item_ids)
        # Compute the dot product between user and item embeddings
        dot_product = (user_embedding * item_embedding).sum(dim=1)
        return dot_product

# Initialize the model with number of users, items, and the embedding size
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter


In [None]:
mf_model1 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model1.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [None]:
print(num_users, num_items, mf_model1)

610 9724 MFModel(
  (user_embedding): Embedding(610, 50)
  (item_embedding): Embedding(9724, 50)
)


In [None]:
def train_mf_model(model, train_loader, optimizer, criterion, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            user_ids = batch['user_idx'].to(device)
            item_ids = batch['item_idx'].to(device)
            ratings = batch['rating'].to(device)

            optimizer.zero_grad()  # Zero the gradients
            preds = model(user_ids, item_ids)  # Forward pass
            loss = criterion(preds, ratings)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Gradient descent step
            total_loss += loss.item()  # Accumulate loss

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}')


In [None]:
train_mf_model(mf_model1, train_loader, optimizer, loss_fn)

Epoch 1/10, Loss: 54.6969
Epoch 2/10, Loss: 37.4366
Epoch 3/10, Loss: 26.6908
Epoch 4/10, Loss: 19.7055
Epoch 5/10, Loss: 14.5891
Epoch 6/10, Loss: 10.2549
Epoch 7/10, Loss: 6.6135
Epoch 8/10, Loss: 4.1228
Epoch 9/10, Loss: 2.6567
Epoch 10/10, Loss: 1.8019


In [None]:
with torch.no_grad():
    user_embeddings = mf_model1.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model1.item_embedding.weight.cpu().numpy()

In [None]:
print(user_embeddings.shape)
print(user_embeddings)

(610, 50)
[[ 1.5477024e-04  4.4074067e-01  4.0332235e-02 ... -5.9118766e-01
   1.7145234e-01 -9.1096663e-01]
 [ 2.7443569e-02 -6.9788116e-01  1.9023906e-01 ...  3.6470050e-01
  -1.2387406e+00 -4.6131521e-01]
 [ 2.5046322e-01  2.5560412e-01 -1.5024363e-01 ... -1.9373876e+00
  -4.2474613e-01 -1.0763760e+00]
 ...
 [-1.6998166e-01  5.2733362e-01  2.1600449e-01 ... -3.1336018e-01
   2.3594224e-01  9.0743750e-02]
 [ 9.0982920e-01  1.0660665e-01  1.7177416e+00 ... -5.4283381e-01
   8.1047809e-01 -3.6990464e-02]
 [ 1.8971203e-02  6.7724478e-01  7.9474516e-02 ... -3.3001146e-01
  -1.9065486e-01  3.2382765e-01]]


In [None]:
def create_graph_data(ratings,num_users,user_embeddings,item_embeddings):
    user_item_edges = ratings[['user_idx', 'item_idx']].values.T  # Create edges between user-item pairs

    user_item_edges[1] += num_users

    # Create edge index (format required by torch_geometric)
    edge_index = torch.tensor(user_item_edges, dtype=torch.long)

    # Concatenate user and item embeddings to form node features
    node_features = torch.cat([torch.tensor(user_embeddings, dtype=torch.float), torch.tensor(item_embeddings, dtype=torch.float)], dim=0)

    print(node_features.shape)
    print(user_item_edges.shape)
    print(user_item_edges)

    # Create the PyTorch Geometric data object (x: node features, edge_index: graph edges)
    train_graph_data = Data(x=node_features, edge_index=edge_index)
    return train_graph_data


In [None]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

torch.Size([10334, 50])
(2, 100836)
[[   0    0    0 ...  609  609  609]
 [ 610  611  612 ... 3731 2002 3483]]


In [None]:
class GCNModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNModel, self).__init__()
        # First graph convolutional layer
        self.conv1 = GCNConv(in_channels, hidden_channels)
        # Second graph convolutional layer
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        # Forward pass through the first graph convolutional layer
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)  # Apply ReLU non-linearity
        # Forward pass through the second graph convolutional layer
        x = self.conv2(x, edge_index)
        return x

In [None]:
# Initialize the GCN model
model1 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)

In [None]:
gcn_optimizer = optim.Adam(model1.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [None]:
def train_gcn_model(model, train_graph, optimizer, criterion, interaction_matrix,num_epochs=30):
    model.train()  # Set model to training mode
    user_embed=[]
    item_embed=[]
    for epoch in range(num_epochs):
        optimizer.zero_grad()  # Zero the gradients
        output = model(train_graph)  # Forward pass through the GCN


        #print('output dimension',output.shape)
        # Assuming user_idx and item_idx are indices of user-item pairs
        user_indices = ratings['user_idx'].unique()  # Indices for users
        item_indices = ratings['item_idx'].unique()  # Indices for items


        #print('user indices dimension check',user_indices.shape)
        #print('item indices dimension check',item_indices.shape)
        # Get embeddings for the relevant user-item pairs
        user_embeddings = output[user_indices]  # Shape: (N, embedding_size)
        item_embeddings = output[item_indices + num_users]  # Shift by num_users for items

        # Compute predicted ratings
        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T) # Dot product

        # Get target ratings from interaction matrix
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor
        #target = interaction_tensor[user_indices, item_indices].view(-1)  # Flatten to match

        # Compute loss
        loss = criterion(predicted_ratings, target)
        loss.backward()  # Backpropagation
        optimizer.step()  # Gradient descent step

        user_embed= user_embeddings
        item_embed= item_embeddings

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}')

    return user_embed, item_embed

In [None]:
user_embeddings, item_embeddings= train_gcn_model(model1, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix)

Epoch 1/30, Loss: 157.2139
Epoch 2/30, Loss: 11.4264
Epoch 3/30, Loss: 5.8486
Epoch 4/30, Loss: 9.8361
Epoch 5/30, Loss: 14.0040
Epoch 6/30, Loss: 14.2494
Epoch 7/30, Loss: 11.7604
Epoch 8/30, Loss: 8.7397
Epoch 9/30, Loss: 6.2388
Epoch 10/30, Loss: 4.4234
Epoch 11/30, Loss: 3.1859
Epoch 12/30, Loss: 2.3583
Epoch 13/30, Loss: 1.8039
Epoch 14/30, Loss: 1.4260
Epoch 15/30, Loss: 1.1638
Epoch 16/30, Loss: 0.9766
Epoch 17/30, Loss: 0.8380
Epoch 18/30, Loss: 0.7333
Epoch 19/30, Loss: 0.6533
Epoch 20/30, Loss: 0.5915
Epoch 21/30, Loss: 0.5427
Epoch 22/30, Loss: 0.5040
Epoch 23/30, Loss: 0.4729
Epoch 24/30, Loss: 0.4475
Epoch 25/30, Loss: 0.4266
Epoch 26/30, Loss: 0.4092
Epoch 27/30, Loss: 0.3945
Epoch 28/30, Loss: 0.3821
Epoch 29/30, Loss: 0.3714
Epoch 30/30, Loss: 0.3622


In [None]:
print(user_embeddings)
print(user_embeddings.shape)

tensor([[ 0.1857, -0.0128,  0.1200,  ..., -0.1174, -0.0030, -0.0708],
        [-0.1522,  0.4284,  0.2417,  ...,  0.2828, -0.1968,  0.0984],
        [ 0.0252, -0.0926,  0.3362,  ...,  0.0628, -0.1684, -0.0068],
        ...,
        [ 0.0263, -0.0391,  0.1251,  ..., -0.0085, -0.0080, -0.0169],
        [ 0.2150,  0.0108,  0.2129,  ..., -0.2333,  0.2984,  0.0202],
        [ 0.0494,  0.0105,  0.1090,  ...,  0.0606, -0.0413,  0.0364]],
       grad_fn=<IndexBackward0>)
torch.Size([610, 32])


In [None]:
print(item_embeddings)
print(item_embeddings.shape)
old_item_embeddings= item_embeddings

tensor([[-2.9757e-01,  3.9954e-01, -1.5570e-01,  ..., -6.5423e-01,
          3.8565e-01,  1.0043e+00],
        [-2.3310e-01,  4.5532e-01, -3.9908e-01,  ..., -3.0845e-01,
          3.2284e-01,  3.0605e-01],
        [-2.8209e-01,  6.5010e-01, -4.7004e-01,  ..., -4.9673e-01,
          3.5249e-01,  7.1333e-01],
        ...,
        [ 2.9734e-02,  9.7946e-03,  1.2599e-01,  ...,  1.7633e-02,
         -1.6950e-02,  2.4318e-02],
        [ 1.5419e-01,  1.4447e-02,  1.1107e-01,  ...,  4.3029e-02,
          8.9812e-03,  9.2749e-02],
        [ 7.6198e-02, -9.7442e-04,  1.9572e-01,  ..., -3.4445e-02,
         -3.1626e-02, -6.3794e-02]], grad_fn=<IndexBackward0>)
torch.Size([9724, 32])


In [None]:
def evaluate_gcn_model(model, user_embeddings, item_embeddings, interaction_matrix):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():

        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T)
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor

        rmse = np.sqrt(mean_squared_error(target, predicted_ratings))  # Compute RMSE
        print(f'RMSE: {rmse:.4f}')

In [None]:
def predict_new_user_rating(item_embeddings, masked_array, num_users=num_users):
    item_embeddings = item_embeddings.detach().numpy()
    masked_array = np.array(masked_array, dtype=np.float32)

    masked_array = masked_array.reshape(-1, 1)  # Shape: (num_items, 1)
    weighted_sum = np.sum(item_embeddings * masked_array, axis=0)
    sum_of_weights = np.sum(masked_array)
    new_user_embedding = weighted_sum / sum_of_weights
    predicted_ratings = np.dot(item_embeddings, new_user_embedding)

    return predicted_ratings

In [None]:
def prediction_test(num_users, interaction_array, item_embeddings):
    metric=0
    for i in range(len(interaction_array) - num_users, len(interaction_array)):
        normal_test = interaction_array[i]
        non_zero_indices = np.nonzero(normal_test)[0]
        num_values_to_keep = len(non_zero_indices) // 2
        selected_indices = np.random.choice(non_zero_indices, size=num_values_to_keep, replace=False)
        masked_array = np.zeros_like(normal_test)
        masked_array[selected_indices] = normal_test[selected_indices]
        prediction = predict_new_user_rating(item_embeddings, masked_array)
        rmse = np.sqrt(np.mean((prediction - normal_test) ** 2))
        metric+= rmse
    return metric/num_users

In [None]:
# Evaluate on training data
evaluate_gcn_model(model1, user_embeddings, item_embeddings,  interaction_matrix)

RMSE: 0.6019


Testing for RECnormal

In [None]:
print(prediction_test(50, interaction_array, old_item_embeddings))

1.1991706365211101


In [None]:
# normal_test = interaction_array[:50]

In [None]:
# num_values_to_keep = 20
# non_zero_indices = np.nonzero(normal_test)[0]
# selected_indices = np.random.choice(non_zero_indices, size=num_values_to_keep, replace=False)
# masked_array = np.zeros_like(normal_test)
# masked_array[selected_indices] = normal_test[selected_indices]
# print(masked_array)

In [None]:
# prediction = predict_new_user_rating(old_item_embeddings, masked_array)

In [None]:
# rmse = np.sqrt(np.mean((prediction - normal_test) ** 2))
# print("RMSE:", rmse)

In [None]:
# get the adversarial exmaples
interaction_array_manipulated = adversarial_manipulation(user_embeddings, item_embeddings, interaction_array, model1, 5)

The current Payoff is: 129.5894561508261
The current Payoff is: 187.35015118447063
The current Payoff is: 120.83751022372473
The current Payoff is: 187.35444951561834
The current Payoff is: 135.5049618286381
The current Payoff is: 142.10198282994554
The current Payoff is: 159.49090621273086
The current Payoff is: 125.21516606879946
The current Payoff is: 181.5549168362805
The current Payoff is: 142.11454471616645
[array([4.        , 4.00888889, 3.01777778, ..., 0.01333333, 0.00888889,
       0.        ]), array([4.00888889, 4.01777778, 3.        , ..., 0.04444444, 0.03111111,
       0.        ]), array([3.00444444, 1.52222222, 4.51777778, ..., 0.03111111, 0.        ,
       0.        ]), array([4.02222222, 0.00444444, 4.00888889, ..., 0.        , 0.00444444,
       0.        ]), array([3.00888889, 1.52222222, 4.51333333, ..., 0.01333333, 0.02666667,
       0.        ])]


### Phase 2

In [None]:
user_indices, item_indices = np.nonzero(interaction_array_manipulated)  # Get indices of non-zero elements

# Retrieve the corresponding ratings from interaction_array
ratings = interaction_array_manipulated[user_indices, item_indices]

# Create a DataFrame similar to the original ratings DataFrame
reconstructed_ratings = pd.DataFrame({
    'user_idx': user_indices,
    'item_idx': item_indices,
    'rating': ratings
})

# Print to verify
print(reconstructed_ratings)

        user_idx  item_idx    rating
0              0         0  4.000000
1              0         1  4.000000
2              0         2  4.000000
3              0         3  5.000000
4              0         4  5.000000
...          ...       ...       ...
149444       614      9718  0.031111
149445       614      9719  0.035556
149446       614      9720  0.013333
149447       614      9721  0.013333
149448       614      9722  0.026667

[149449 rows x 3 columns]


In [None]:
ratings = reconstructed_ratings

In [None]:
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

In [None]:
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)

In [None]:
users = ratings['user_idx'].unique()
items = ratings['item_idx'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['user_idx'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['item_idx'].apply(lambda x: item_to_idx[x])

In [None]:
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter

In [None]:
mf_model2 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model2.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [None]:
print(num_users, num_items, mf_model2)

615 9724 MFModel(
  (user_embedding): Embedding(615, 50)
  (item_embedding): Embedding(9724, 50)
)


In [None]:
train_mf_model(mf_model2, train_loader, optimizer, loss_fn)

Epoch 1/10, Loss: 43.5596
Epoch 2/10, Loss: 24.4903
Epoch 3/10, Loss: 16.6059
Epoch 4/10, Loss: 11.5849
Epoch 5/10, Loss: 7.5016
Epoch 6/10, Loss: 4.4021
Epoch 7/10, Loss: 2.6568
Epoch 8/10, Loss: 1.7856
Epoch 9/10, Loss: 1.3299
Epoch 10/10, Loss: 1.0686


In [None]:
with torch.no_grad():
    user_embeddings = mf_model2.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model2.item_embedding.weight.cpu().numpy()

In [None]:
print(user_embeddings.shape)
print(user_embeddings)

(615, 50)
[[ 1.1934199  -0.18560572 -0.429208   ... -0.09170704  0.66021615
  -0.70002294]
 [-0.7782157  -1.0830592   0.85988724 ...  1.0737648   1.0355741
   1.6933202 ]
 [-1.3466654  -0.81042135  1.4166676  ... -0.4928114   0.8047629
  -1.0115247 ]
 ...
 [ 0.35319883 -0.11706936 -0.14369167 ...  0.14567022 -0.06333251
  -0.13987537]
 [ 0.18323533 -0.03571222 -0.10329661 ...  0.02786149  0.03955904
  -0.1201558 ]
 [ 0.32783994 -0.07550949 -0.1614634  ...  0.15269202 -0.11671703
  -0.14155258]]


In [None]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

torch.Size([10339, 50])
(2, 149449)
[[    0     0     0 ...   614   614   614]
 [  615   616   617 ... 10335 10336 10337]]


In [None]:
model2 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)
gcn_optimizer = optim.Adam(model2.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [None]:
interaction_matrix_manipulated = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [None]:
user_embeddings, item_embeddings= train_gcn_model(model2, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix_manipulated)

Epoch 1/30, Loss: 57.9441
Epoch 2/30, Loss: 4.1436
Epoch 3/30, Loss: 7.9179
Epoch 4/30, Loss: 9.9670
Epoch 5/30, Loss: 6.1768
Epoch 6/30, Loss: 3.2390
Epoch 7/30, Loss: 1.8769
Epoch 8/30, Loss: 1.2812
Epoch 9/30, Loss: 1.0234
Epoch 10/30, Loss: 0.9083
Epoch 11/30, Loss: 0.8528
Epoch 12/30, Loss: 0.8171
Epoch 13/30, Loss: 0.7840
Epoch 14/30, Loss: 0.7502
Epoch 15/30, Loss: 0.7147
Epoch 16/30, Loss: 0.6781
Epoch 17/30, Loss: 0.6404
Epoch 18/30, Loss: 0.6031
Epoch 19/30, Loss: 0.5675
Epoch 20/30, Loss: 0.5342
Epoch 21/30, Loss: 0.5035
Epoch 22/30, Loss: 0.4758
Epoch 23/30, Loss: 0.4512
Epoch 24/30, Loss: 0.4294
Epoch 25/30, Loss: 0.4103
Epoch 26/30, Loss: 0.3935
Epoch 27/30, Loss: 0.3788
Epoch 28/30, Loss: 0.3659
Epoch 29/30, Loss: 0.3547
Epoch 30/30, Loss: 0.3449


In [None]:
evaluate_gcn_model(model2, user_embeddings, item_embeddings,  interaction_matrix_manipulated)

RMSE: 0.5872


Compare the rmse for all 3 models calculated by predicting ratings for a new user

In [None]:
# adversarial_interactions= interaction_array_manipulated[610:]
# adversarial_interactions.shape

Testing for RECsecure

In [None]:
print(prediction_test(50, interaction_array_manipulated, item_embeddings))

1.2668284680719553


In [None]:
# adversarial_test = adversarial_interactions[np.random.choice(adversarial_interactions.shape[0])]
# num_values_to_keep = 2000
# non_zero_indices = np.nonzero(adversarial_test)[0]
# selected_indices = np.random.choice(non_zero_indices, size=num_values_to_keep, replace=False)
# masked_array = np.zeros_like(adversarial_test)
# masked_array[selected_indices] = adversarial_test[selected_indices]
# print(masked_array)

In [None]:
# prediction = predict_new_user_rating(item_embeddings, masked_array)

In [None]:
# rmse = np.sqrt(np.mean((prediction - adversarial_test) ** 2))
# print("RMSE:", rmse)

Testing for RECmani

In [None]:
print(prediction_test(50, interaction_array_manipulated, old_item_embeddings))

1.2324665011308187


In [None]:
# prediction = predict_new_user_rating(old_item_embeddings, masked_array)

In [None]:
# rmse = np.sqrt(np.mean((prediction - adversarial_test) ** 2))
# print("RMSE:", rmse)