In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.loader import DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [9]:
# Load the dataset
url = "ratings_Electronics (1).csv"
df = pd.read_csv(url)

df.head()

Unnamed: 0,AKM1MP6P0OYPR,0132793040,5.0,1365811200
0,A2CX7LUOHB2NDG,321732944,5.0,1341100800
1,A2NWSAGRHCP8N5,439886341,1.0,1367193600
2,A2WNBOD3WNDNKT,439886341,3.0,1374451200
3,A1GI0U4ZRJA8WN,439886341,1.0,1334707200
4,A1QGNMC6O1VW39,511189877,5.0,1397433600


In [10]:
# Rename Headers
df.rename(columns = {'AKM1MP6P0OYPR':'userId', '0132793040':'productId', '5.0':'Rating', '1365811200':'timestamp'}, inplace = True)

# Crop data
df = df.head(5000)

# Clean the data
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

# Encode user IDs and item IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['userId'] = user_encoder.fit_transform(df['userId'])
df['productId'] = item_encoder.fit_transform(df['productId'])

# Split the data into training, validation, and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Display the shape of the datasets
train_df.shape, val_df.shape, test_df.shape

((3200, 4), (800, 4), (1000, 4))

In [11]:
df.head()

Unnamed: 0,userId,productId,Rating,timestamp
0,1759,0,5.0,1341100800
1,2154,1,1.0,1367193600
2,2477,1,3.0,1374451200
3,603,1,1.0,1334707200
4,960,2,5.0,1397433600


In [12]:
# Create edge index from user-item interactions
edge_index = torch.tensor([train_df['userId'].values, train_df['productId'].values], dtype=torch.long)

# Create edge attributes (ratings)
edge_attr = torch.tensor(train_df['Rating'].values, dtype=torch.float)

# Create the PyTorch Geometric data object
data = Data(edge_index=edge_index, edge_attr=edge_attr)

# Display the data object
data

  edge_index = torch.tensor([train_df['userId'].values, train_df['productId'].values], dtype=torch.long)


Data(edge_index=[2, 3200], edge_attr=[3200])

In [13]:
num_users = df['userId'].nunique()
num_items = df['productId'].nunique()
num_nodes = num_users + num_items

# Create node features
node_features = torch.eye(num_nodes)

# Add node features to the data object
data.x = node_features

# Display the updated data object
data

Data(edge_index=[2, 3200], edge_attr=[3200], x=[5227, 5227])

In [14]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.fc = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)

        # Apply the final linear layer on the concatenated edge features
        edge_pred = self.fc(torch.cat([x[edge_index[0]], x[edge_index[1]]], dim=1))
        return edge_pred.squeeze()

# Initialize the model
model = GCN(in_channels=node_features.size(1), hidden_channels=16, out_channels=1)

print(model)

# Prepare the data loader
train_loader = DataLoader([data], batch_size=1, shuffle=True)

# Define the loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(200):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.edge_attr.view(-1, 1))
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

GCN(
  (conv1): GCNConv(5227, 16)
  (conv2): GCNConv(16, 16)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 18.06903839111328
Epoch 2, Loss: 16.7755126953125
Epoch 3, Loss: 15.148188591003418
Epoch 4, Loss: 13.088510513305664
Epoch 5, Loss: 10.806111335754395
Epoch 6, Loss: 8.563568115234375
Epoch 7, Loss: 6.776577472686768
Epoch 8, Loss: 6.0371413230896
Epoch 9, Loss: 6.550895690917969
Epoch 10, Loss: 7.1099467277526855
Epoch 11, Loss: 6.755470275878906
Epoch 12, Loss: 5.8135294914245605
Epoch 13, Loss: 4.826439380645752
Epoch 14, Loss: 4.127345561981201
Epoch 15, Loss: 3.7804179191589355
Epoch 16, Loss: 3.6783149242401123
Epoch 17, Loss: 3.6582722663879395
Epoch 18, Loss: 3.609409809112549
Epoch 19, Loss: 3.483248710632324
Epoch 20, Loss: 3.2971527576446533
Epoch 21, Loss: 3.100945472717285
Epoch 22, Loss: 2.949089765548706
Epoch 23, Loss: 2.880405902862549
Epoch 24, Loss: 2.894237756729126
Epoch 25, Loss: 2.9305813312530518
Epoch 26, Loss: 2.918001174926758
Epoch 27, Loss: 2.83248233795166
Epoch 28, Loss: 2.7012906074523926
Epoch 29, Loss: 2.5719664096832275
Epoch 30, Loss:

In [16]:
# Convert validation and test data to PyTorch Geometric format
val_edge_index = torch.tensor([val_df['userId'].values, val_df['productId'].values], dtype=torch.long)
val_edge_attr = torch.tensor(val_df['Rating'].values, dtype=torch.float)

test_edge_index = torch.tensor([test_df['userId'].values, test_df['productId'].values], dtype=torch.long)
test_edge_attr = torch.tensor(test_df['Rating'].values, dtype=torch.float)

# Create data objects for validation and test sets
val_data = Data(edge_index=val_edge_index, edge_attr=val_edge_attr, x=node_features)
test_data = Data(edge_index=test_edge_index, edge_attr=test_edge_attr, x=node_features)

# Evaluate the model
model.eval()
with torch.no_grad():
    val_out = model(val_data)
    test_out = model(test_data)

# Calculate evaluation metrics
val_rmse = mean_squared_error(val_edge_attr.numpy(), val_out.numpy())
val_mae = mean_absolute_error(val_edge_attr.numpy(), val_out.numpy())

test_rmse = mean_squared_error(test_edge_attr.numpy(), test_out.numpy())
test_mae = mean_absolute_error(test_edge_attr.numpy(), test_out.numpy())

print(f'Validation RMSE: {val_rmse}, Validation MAE: {val_mae}')
print(f'Test RMSE: {test_rmse}, Test MAE: {test_mae}')

Validation RMSE: 2.987588882446289, Validation MAE: 1.5997092723846436
Test RMSE: 2.8026068210601807, Test MAE: 1.5275242328643799


In [22]:
test_data.x[0]

tensor([1., 0., 0.,  ..., 0., 0., 0.])

In [23]:
test_out[0]

tensor(3.1325)