In [None]:
import pandas as pd

# Assuming df is your DataFrame
data = pd.read_csv('/content/drive/MyDrive/ML Project/grocery_ratings.csv')
df['date'] = pd.to_datetime(df['date'], unit='s')  # Convert timestamp to datetime

# Sort interactions for each user by date
df = df.sort_values(by=['reviewerID', 'date'])

# Create user-item interaction sequences
user_sequences = df.groupby('reviewerID').agg(list)

# Example output structure:
# reviewerID: ['product_id_seq', 'rating_seq']
print(user_sequences.head())


## data split

In [None]:
def split_sequences(data, train_ratio=0.8):
    X_train, X_test, Y_train, Y_test = [], [], [], []
    
    for index, row in data.iterrows():
        num_train = int(len(row['product_id']) * train_ratio)
        
        # Train on first `num_train` products, predict the rest
        X_train.append(row['product_id'][:num_train])
        Y_train.append(row['rating'][:num_train])
        
        X_test.append(row['product_id'][num_train:])
        Y_test.append(row['rating'][num_train:])
        
    return X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = split_sequences(user_sequences)


Model architecture

In [None]:
import torch
import torch.nn as nn

class RatingPredictionModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RatingPredictionModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # Predict the next rating based on last output
        return out

# Define model parameters
input_size = len(X_train[0][0])  # Product ID and rating embedding size
hidden_size = 128
output_size = 1  # Predicting a single rating

model = RatingPredictionModel(input_size, hidden_size, output_size)


## loss and evaluation

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# During training, compare predicted ratings with Y_train and use MSE for loss


In [None]:
import numpy as np

# After prediction
def evaluate(predictions, targets):
    mse = np.mean((predictions - targets)**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(predictions - targets))
    return rmse, mae

# Example usage after model prediction
# predictions = model(X_test)
# rmse, mae = evaluate(predictions, Y_test)
