In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import cv2
import warnings
import copy
import random
from keras import layers, models
from keras.utils import to_categorical
from keras.datasets import mnist
from keras.optimizers import Adam
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
!pip install torch_geometric
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import zipfile
import requests
from io import BytesIO

In [None]:
# Set device: use GPU if available, otherwise fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define base path to save trained models and other outputs
save_base_path = "/path/to/save"

In [None]:
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'

# Download the dataset
response = requests.get(url)
zip_file = zipfile.ZipFile(BytesIO(response.content))

# Extract the ratings and movies CSV files
ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))

# Preview the datasets
print(ratings.head())
print(movies.head())

In [None]:
# Get unique users and items from the ratings dataset
users = ratings['userId'].unique()
items = ratings['movieId'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['userId'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['movieId'].apply(lambda x: item_to_idx[x])

In [None]:
print(ratings)

In [None]:
print(users.size)
print(items.size)

In [None]:
interaction_matrix = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [None]:
interaction_array= np.array(interaction_matrix)
print(interaction_array)
print('matrix dimensions : ', interaction_array.shape)

In [None]:
class UserItemDataset(Dataset):
    def __init__(self, ratings):
        self.ratings = ratings

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        row = self.ratings.iloc[idx]
        return {
            'user_idx': torch.tensor(row['user_idx'], dtype=torch.long),
            'item_idx': torch.tensor(row['item_idx'], dtype=torch.long),
            'rating': torch.tensor(row['rating'], dtype=torch.float),
        }

# Create train, validation, and test splits (80% train, 10% validation, 10% test)
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
# test_size = len(ratings) - train_size - val_size

train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

# Create data loaders for batching
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)
# val_loader = DataLoader(UserItemDataset(ratings.iloc[val_dataset.indices]), batch_size=32, shuffle= True)
# test_loader = DataLoader(UserItemDataset(ratings.iloc[test_dataset.indices]), batch_size=32, shuffle=False)

In [None]:
class MFModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_size):
        super(MFModel, self).__init__()
        # Create embedding layers for users and items
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)

    def forward(self, user_ids, item_ids):
        # Get user and item embeddings
        user_embedding = self.user_embedding(user_ids)
        item_embedding = self.item_embedding(item_ids)
        # Compute the dot product between user and item embeddings
        dot_product = (user_embedding * item_embedding).sum(dim=1)
        return dot_product

# Initialize the model with number of users, items, and the embedding size
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter

In [None]:
mf_model1 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model1.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [None]:
def train_mf_model(model, train_loader, optimizer, criterion, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            user_ids = batch['user_idx'].to(device)
            item_ids = batch['item_idx'].to(device)
            ratings = batch['rating'].to(device)

            optimizer.zero_grad()  # Zero the gradients
            preds = model(user_ids, item_ids)  # Forward pass
            loss = criterion(preds, ratings)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Gradient descent step
            total_loss += loss.item()  # Accumulate loss

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}')

In [None]:
train_mf_model(mf_model1, train_loader, optimizer, loss_fn)

In [None]:
with torch.no_grad():
    user_embeddings = mf_model1.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model1.item_embedding.weight.cpu().numpy()

In [None]:
def create_graph_data(ratings,num_users,user_embeddings,item_embeddings):
    user_item_edges = ratings[['user_idx', 'item_idx']].values.T  # Create edges between user-item pairs

    user_item_edges[1] += num_users

    # Create edge index (format required by torch_geometric)
    edge_index = torch.tensor(user_item_edges, dtype=torch.long)

    # Concatenate user and item embeddings to form node features
    node_features = torch.cat([torch.tensor(user_embeddings, dtype=torch.float), torch.tensor(item_embeddings, dtype=torch.float)], dim=0)

    print(node_features.shape)
    print(user_item_edges.shape)
    print(user_item_edges)

    # Create the PyTorch Geometric data object (x: node features, edge_index: graph edges)
    train_graph_data = Data(x=node_features, edge_index=edge_index)
    return train_graph_data

In [None]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

In [None]:
class GCNModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNModel, self).__init__()
        # First graph convolutional layer
        self.conv1 = GCNConv(in_channels, hidden_channels)
        # Second graph convolutional layer
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        # Forward pass through the first graph convolutional layer
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)  # Apply ReLU non-linearity
        # Forward pass through the second graph convolutional layer
        x = self.conv2(x, edge_index)
        return x

In [None]:
model1 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)

In [None]:
gcn_optimizer = optim.Adam(model1.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [None]:
def train_gcn_model(model, train_graph, optimizer, criterion, interaction_matrix,num_epochs=30):
    model.train()  # Set model to training mode
    user_embed=[]
    item_embed=[]
    for epoch in range(num_epochs):
        optimizer.zero_grad()  # Zero the gradients
        output = model(train_graph)  # Forward pass through the GCN


        #print('output dimension',output.shape)
        # Assuming user_idx and item_idx are indices of user-item pairs
        user_indices = ratings['user_idx'].unique()  # Indices for users
        item_indices = ratings['item_idx'].unique()  # Indices for items


        #print('user indices dimension check',user_indices.shape)
        #print('item indices dimension check',item_indices.shape)
        # Get embeddings for the relevant user-item pairs
        user_embeddings = output[user_indices]  # Shape: (N, embedding_size)
        item_embeddings = output[item_indices + num_users]  # Shift by num_users for items

        # Compute predicted ratings
        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T) # Dot product

        # Get target ratings from interaction matrix
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor
        #target = interaction_tensor[user_indices, item_indices].view(-1)  # Flatten to match

        # Compute loss
        loss = criterion(predicted_ratings, target)
        loss.backward()  # Backpropagation
        optimizer.step()  # Gradient descent step

        user_embed= user_embeddings
        item_embed= item_embeddings

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}')

    return user_embed, item_embed

In [None]:
user_embeddings, item_embeddings= train_gcn_model(model1, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix)

In [None]:
print(user_embeddings)
print(user_embeddings.shape)

In [None]:
print(item_embeddings)
print(item_embeddings.shape)
old_item_embeddings= item_embeddings

In [None]:
def evaluate_gcn_model(model, user_embeddings, item_embeddings, interaction_matrix):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():

        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T)
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor

        rmse = np.sqrt(mean_squared_error(target, predicted_ratings))  # Compute RMSE
        print(f'RMSE: {rmse:.4f}')

In [None]:
def predict_new_user_rating(item_embeddings, masked_array, num_users=num_users):
    item_embeddings = item_embeddings.detach().numpy()
    masked_array = np.array(masked_array, dtype=np.float32)

    masked_array = masked_array.reshape(-1, 1)  # Shape: (num_items, 1)
    weighted_sum = np.sum(item_embeddings * masked_array, axis=0)
    sum_of_weights = np.sum(masked_array)
    new_user_embedding = weighted_sum / sum_of_weights
    predicted_ratings = np.dot(item_embeddings, new_user_embedding)

    return predicted_ratings

In [None]:
def prediction_test(num_users, interaction_array, item_embeddings):
    metric=0
    for i in range(len(interaction_array) - num_users, len(interaction_array)):
        normal_test = interaction_array[i]
        non_zero_indices = np.nonzero(normal_test)[0]
        num_values_to_keep = len(non_zero_indices) // 2
        selected_indices = np.random.choice(non_zero_indices, size=num_values_to_keep, replace=False)
        masked_array = np.zeros_like(normal_test)
        masked_array[selected_indices] = normal_test[selected_indices]
        prediction = predict_new_user_rating(item_embeddings, masked_array)
        prediction= np.clip(prediction, 0, 5)
        rmse = np.sqrt(np.mean((prediction - normal_test) ** 2))
        metric+= rmse
    return metric/num_users

In [None]:
evaluate_gcn_model(model1, user_embeddings, item_embeddings,  interaction_matrix)

RECNORMAL

In [None]:
print(prediction_test(50, interaction_array, old_item_embeddings))

# PHASE 2

In [None]:
# Encoder: Maps interaction matrix to latent space
class Encoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Encoder, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim),
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# Generator: Generates perturbed interactions
class Generator(nn.Module):
    def __init__(self, latent_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
            nn.Tanh(),  # Output perturbations in range [-1, 1]
        )

    def forward(self, z):
        return self.model(z)

In [None]:
def train_encoder_generator(encoder, generator, interaction_matrix, num_epochs=20, batch_size=61, lr=0.001, lambda_reg=0.1):
    e_optimizer = torch.optim.Adam(encoder.parameters(), lr=lr)
    g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
    mse_loss = nn.MSELoss()

    for epoch in range(num_epochs):
        for i in range(0, len(interaction_matrix), batch_size):
            # Get batch of data
            real_data = interaction_matrix[i:i + batch_size]
            batch_size = real_data.size(0)

            # Encode real data to latent space
            latent_real = encoder(real_data)

            # Generate perturbations
            perturbations = generator(latent_real)

            # Create perturbed matrix
            perturbed_data = real_data + perturbations
            perturbed_data = torch.clamp(perturbed_data, 0, 5)  # Clip to valid range [0, 5]

            # Loss: Reconstruction + Regularization
            recon_loss = mse_loss(perturbed_data, real_data)
            reg_loss = lambda_reg * torch.norm(perturbations, p=2)
            loss = recon_loss + reg_loss

            # Backward and optimization
            e_optimizer.zero_grad()
            g_optimizer.zero_grad()
            loss.backward()
            e_optimizer.step()
            g_optimizer.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

    return encoder, generator

In [None]:
interaction_matrix= torch.tensor(interaction_array, dtype=torch.float32)
print(interaction_matrix)
interaction_matrix = interaction_matrix.float()

In [None]:
num_items = interaction_matrix.size(1)
latent_dim = 8

# Initialize encoder and generator
encoder = Encoder(input_dim=num_items, latent_dim=latent_dim)
generator = Generator(latent_dim=latent_dim, output_dim=num_items)

In [None]:
# Train the encoder and generator
encoder, generator = train_encoder_generator(
    encoder, generator, interaction_matrix, num_epochs=10, batch_size=61, lr=0.01, lambda_reg=1
)


In [None]:
# Generate perturbed interactions
with torch.no_grad():
    latent_real = encoder(interaction_matrix)
    perturbations = generator(latent_real)
    perturbed_matrix = torch.clamp(interaction_matrix + perturbations*10, 0, 5)

print("Original Interaction Matrix:")
print(interaction_matrix)

print("Perturbed Interaction Matrix:")
print(perturbed_matrix)

In [None]:
interaction_array_manipulated= np.array(perturbed_matrix)

In [None]:
user_indices, item_indices = np.nonzero(interaction_array_manipulated)  # Get indices of non-zero elements

# Retrieve the corresponding ratings from interaction_array
ratings = interaction_array_manipulated[user_indices, item_indices]

# Create a DataFrame similar to the original ratings DataFrame
reconstructed_ratings = pd.DataFrame({
    'user_idx': user_indices,
    'item_idx': item_indices,
    'rating': ratings
})

# Print to verify
print(reconstructed_ratings)

In [None]:
ratings = reconstructed_ratings

In [None]:
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

In [None]:
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)

In [None]:
users = ratings['user_idx'].unique()
items = ratings['item_idx'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['user_idx'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['item_idx'].apply(lambda x: item_to_idx[x])

In [None]:
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter

In [None]:
mf_model2 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model2.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [None]:
print(num_users, num_items, mf_model2)

In [None]:
train_mf_model(mf_model2, train_loader, optimizer, loss_fn)

In [None]:
with torch.no_grad():
    user_embeddings = mf_model2.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model2.item_embedding.weight.cpu().numpy()

In [None]:
print(user_embeddings.shape)
print(user_embeddings)

In [None]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

In [None]:
model2 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)
gcn_optimizer = optim.Adam(model2.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [None]:
interaction_matrix_manipulated = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [None]:
user_embeddings, item_embeddings= train_gcn_model(model2, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix_manipulated)

In [None]:
evaluate_gcn_model(model2, user_embeddings, item_embeddings,  interaction_matrix_manipulated)

RECSECURE

In [None]:
print(prediction_test(50, interaction_array_manipulated, item_embeddings))

RECMANI

In [None]:
print(prediction_test(50, interaction_array_manipulated, old_item_embeddings))