In [7]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import cross_validate

# Load the data
data = pd.read_csv('session_data.csv')

# Exploratory Data Analysis (EDA)
# Display the first few rows of the dataset
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Basic statistics
print(data.describe())

# Frequency of each product
product_freq = data['product_name'].value_counts()
print(product_freq)

# Collaborative Filtering Setup
# Create an implicit rating dataset where if a product is bought in a session, it is considered as a positive interaction
# We will use the number of times a product appears in a session as an implicit feedback score
# For collaborative filtering, we need to map product names and session ids to integer ids

# Create a mapping for session_ids and product_names to unique integer ids
session_id_map = {id: idx for idx,
                  id in enumerate(data['session_id'].unique())}
product_id_map = {name: idx for idx,
                  name in enumerate(data['product_name'].unique())}

# Add these integer ids to the dataframe
data['session_idx'] = data['session_id'].map(session_id_map)
data['product_idx'] = data['product_name'].map(product_id_map)

# Create the implicit feedback dataset required by Surprise
# Since we don't have explicit ratings, we use the count of products bought in a session as implicit feedback
# For simplicity, we assign a feedback value of 1 to each transaction

# Note: In real-world scenarios, feedback can be scaled based on frequency or other business logic
data['feedback'] = 1

# Convert the data to Surprise's format
reader = Reader(rating_scale=(0, 1))
dataset = Dataset.load_from_df(
    data[['session_idx', 'product_idx', 'feedback']], reader)

# Split the dataset into training and test sets
trainset, testset = train_test_split(
    dataset.build_full_trainset().build_testset(), test_size=0.2)

# Initialize the SVD model
model = SVD()

# Train the model
model.fit(trainset)

# Evaluate the model using cross-validation
cv_results = cross_validate(model, dataset, measures=[
                            'RMSE', 'MAE'], cv=5, verbose=True)

# Predict on the test set
predictions = model.test(testset)

# Calculate accuracy metrics
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

# Display the accuracy metrics
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')

# Generate recommendations for a session


def get_recommendations(session_id, num_recommendations=5):
    session_idx = session_id_map.get(session_id)
    if session_idx is None:
        return "Session ID not found."

    # All products in the catalog
    all_product_idxs = set(product_id_map.values())
    products_bought = set(
        data[data['session_id'] == session_id]['product_idx'])

    # Products not yet bought in the session
    products_not_bought = all_product_idxs - products_bought

    # Predict the 'feedback' score for products not yet bought
    predictions = [model.predict(session_idx, product_idx)
                   for product_idx in products_not_bought]

    # Sort the predictions by estimated feedback score
    recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)

    # Get the top N recommendations
    recommended_product_idxs = [
        pred.iid for pred in recommendations[:num_recommendations]]
    recommended_products = [list(product_id_map.keys())[list(
        product_id_map.values()).index(idx)] for idx in recommended_product_idxs]

    return recommended_products


# Example usage: Get recommendations for a specific session
session_id_example = '000ed966131fcb96e0efc4ff2b716a3e'
recommendations = get_recommendations(session_id_example)
print(f'Recommendations for session {session_id_example}: {recommendations}')

                         session_id          product_name
0  000ed966131fcb96e0efc4ff2b716a3e              beetroot
1  000ed966131fcb96e0efc4ff2b716a3e              cucumber
2  0013eab657eaf2d82d7f1e13023d95c2                 onion
3  0013eab657eaf2d82d7f1e13023d95c2  long shelf life milk
4  0013fabde1e543dd541be925266aadbc                 dates
session_id      0
product_name    0
dtype: int64
                              session_id product_name
count                             345152       345152
unique                            165335          396
top     27d86d946e6ea6d022f12211d61f8ac7        onion
freq                                   9        31196
onion               31196
cucumber            19083
fresh cow milk      18086
beetroot            17077
gourds              15625
                    ...  
shringar                1
suitcase                1
coriander leaves        1
moth dal                1
cola drinks             1
Name: product_name, Length: 396, dtype: int64


AttributeError: 'list' object has no attribute 'n_users'

In [None]:
# Example usage: Get recommendations for a specific session
session_id_example = '000ed966131fcb96e0efc4ff2b716a3e'
recommendations = get_recommendations(session_id_example)
print(f'Recommendations for session {session_id_example}: {recommendations}')