In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD, KNNBasic
from surprise.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [2]:
path = r"D:\Online Retail.xlsx"

In [3]:
df = pd.read_excel(path)

In [4]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [6]:
df['total_sales']=df['Quantity']*df['UnitPrice']

In [7]:
top_products = df.groupby('Description')['total_sales'].sum().sort_values(ascending=False).head(10)

In [8]:
df_cf = df.groupby(['CustomerID', 'Description'])['Quantity'].sum().reset_index()
reader = Reader(rating_scale=(1, df_cf['Quantity'].max()))
data = Dataset.load_from_df(df_cf[['CustomerID', 'Description', 'Quantity']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [9]:
svd_model = SVD()
svd_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2aaeb4c4f80>

In [10]:
joblib.dump(svd_model, 'svd_collaborative_model.pkl')
joblib.dump(top_products, 'popular_products.pkl')

['popular_products.pkl']

In [11]:
def get_top_selling_products():
    return top_products

In [12]:
def get_personalized_recommendations(user_id):
    all_items = df_cf['Description'].unique()
    purchased = df_cf[df_cf['CustomerID'] == user_id]['Description'].tolist()
    not_purchased = [item for item in all_items if item not in purchased]

    predictions = [svd_model.predict(user_id, item) for item in not_purchased]
    top_recs = sorted(predictions, key=lambda x: x.est, reverse=True)[:5] 
    return [pred.iid for pred in top_recs]

In [13]:
print("Select Recommendation Type:")
print("1. Popular Products")
print("2. Personalized Recommendations")

option = input("Enter 1 or 2: ")

if option == "1":
    print("\nTop Selling Products:")
    for i, (product, sales) in enumerate(top_products.items(), 1):
        print(f"{i}. {product} (Total Sales: £{sales:.2f})")

elif option == "2":
    user_id = input("Enter Customer ID: ")
    user_id = int(user_id)
    print(f"\nTop Recommended Products for User {user_id}:")
    personalized_recs = get_personalized_recommendations(user_id)
    for i, product in enumerate(personalized_recs, 1):
        print(f"• {product}")

Select Recommendation Type:
1. Popular Products
2. Personalized Recommendations

Top Recommended Products for User 17850:
• MEDIUM CERAMIC TOP STORAGE JAR
• 3D DOG PICTURE PLAYING CARDS
• 3D SHEET OF CAT STICKERS
• 3D SHEET OF DOG STICKERS
• 60 TEATIME FAIRY CAKE CASES
