In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/ML Project/grocery_ratings.csv')

# Preprocess the dataset: Encode user_id and item_id
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

data['reviewerID'] = user_encoder.fit_transform(data['reviewerID'])
data['product_id'] = item_encoder.fit_transform(data['product_id'])

# Sort by user_id and timestamp
data = data.sort_values(by=['reviewerID', 'date'])

# Create sequences for each user
def create_sequences(data, seq_length=5):
    sequences = []
    labels = []
    
    user_groups = data.groupby('reviewerID')
    for user_id, group in user_groups:
        group = group.sort_values(by='date')
        items = group['product_id'].values
        ratings = group['rating'].values
        
        for i in range(len(group) - seq_length):
            sequences.append(items[i:i+seq_length])
            labels.append(ratings[i+seq_length])  # Predict the next rating

    return np.array(sequences), np.array(labels)

# Define sequence length (how many previous interactions to consider)
SEQ_LENGTH = 5

X, y = create_sequences(data, SEQ_LENGTH)

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a PyTorch Dataset class
class SequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return {
            'sequence': torch.tensor(self.sequences[idx], dtype=torch.long),
            'label': torch.tensor(self.labels[idx], dtype=torch.float)
        }

# Initialize datasets and loaders
train_dataset = SequenceDataset(X_train, y_train)
test_dataset = SequenceDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define the model (LSTM-based)
class LSTMRecommender(nn.Module):
    def __init__(self, num_items, embedding_dim, hidden_dim, output_dim):
        super(LSTMRecommender, self).__init__()
        self.embedding = nn.Embedding(num_items, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        # Use only the last LSTM output
        out = self.fc(lstm_out[:, -1, :])
        return out

# Initialize model parameters
NUM_ITEMS = len(item_encoder.classes_)
EMBEDDING_DIM = 32
HIDDEN_DIM = 64
OUTPUT_DIM = 1

model = LSTMRecommender(NUM_ITEMS, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
def train(model, train_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch in train_loader:
            sequences = batch['sequence']
            labels = batch['label']

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs.squeeze(), labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Train the model
train(model, train_loader, criterion, optimizer)

# Evaluation loop
def evaluate(model, test_loader):
    model.eval()
    with torch.no_grad():
        total_loss = 0.0
        for batch in test_loader:
            sequences = batch['sequence']
            labels = batch['label']
            
            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs.squeeze(), labels)
            total_loss += loss.item()
        print(f"Test Loss: {total_loss/len(test_loader):.4f}")

# Evaluate the model
evaluate(model, test_loader)
