In [10]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the dataset (excluding Description and InvoiceDate columns)
df = pd.read_csv('Dataset.csv', usecols=['CustomerID', 'StockCode', 'Quantity'])

# Preprocess the data (drop NaN rows and convert to integer)
df.dropna(subset=['CustomerID', 'StockCode', 'Quantity'], inplace=True)
# drop StockCode with non-numeric values
df = df[df['StockCode'].str.isnumeric()]

df = df.astype({'CustomerID': int, 'StockCode': int, 'Quantity': int})

# Create a Surprise Dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['CustomerID', 'StockCode', 'Quantity']], reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build and train the SVD model
model = SVD(n_factors=50, random_state=42)
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate the model
accuracy.rmse(predictions)

RMSE: 46.2750


46.27497570067783

In [3]:
from surprise.accuracy import mae
from surprise.model_selection import cross_validate
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate MAE using Surprise's built-in function
mae_score = mae(predictions)

# Convert predictions to binary recommendations (1 for predicted ratings >= 3, 0 otherwise)
binary_predictions = [1 if prediction.est >= 3 else 0 for prediction in predictions]
binary_actuals = [1 if prediction.r_ui >= 3 else 0 for prediction in predictions]

# Calculate Precision, Recall, and F1-Score using scikit-learn
precision = precision_score(binary_actuals, binary_predictions)
recall = recall_score(binary_actuals, binary_predictions)
f1 = f1_score(binary_actuals, binary_predictions)

print("MAE:", mae_score)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


MAE:  10.7361
MAE: 10.736078917578567
Precision: 0.6581585898334321
Recall: 1.0
F1-Score: 0.79384275288113


In [4]:
# Function to get item recommendations for a given user
def get_item_recommendations(user_id, num_recommendations=5):
    items_already_purchased = df[df['CustomerID'] == user_id]['StockCode'].tolist()
    
    # Get all item IDs
    item_ids = df['StockCode'].unique()
    
    # Remove items already purchased
    items_to_predict = [item_id for item_id in item_ids if item_id not in items_already_purchased]
    
    # Predict ratings for items
    predictions = [model.predict(user_id, item_id) for item_id in items_to_predict]
    
    # Sort predictions by predicted rating
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Get top N recommended items
    top_recommendations = [prediction.iid for prediction in predictions[:num_recommendations]]
    return top_recommendations

# Get item recommendations for a specific user
user_id = 17850  # Replace with a valid CustomerID
recommendations = get_item_recommendations(user_id)
print("Item recommendations for User ID:", user_id)
print(recommendations)

Item recommendations for User ID: 17850
[84879, 22745, 22748, 22749, 22310]


ValueError: not enough values to unpack (expected 4, got 3)

In [12]:
# get list of unique users
users = df['CustomerID'].unique()
users

array([17850, 13047, 12583, ..., 13298, 14569, 12713])

In [16]:
test_len = 0.8*(len(users))
test_customers = users[:int(test_len)]
train_customers = users[int(test_len):]
