                            PRODUCT RECOMMENDATION USING MACHINE LEARNING(COLLABERATIVE FILTER WITH MATRIX FACTORIZATION)

In [257]:
#IMPORTED LIBRARIES FOR HANDLING AND GENERATING THE FAKE RANDOM DATAFRAME
import pandas as pd
import random
from faker import Faker
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity


In [258]:
#GENERATED THE FAKE DATA USING FAKER LIBRARIES
fake = Faker()
num_records = 1000  

customers = [f'CUST{str(i).zfill(4)}' for i in range(1, 101)]  
products = [f'PROD{str(i).zfill(4)}' for i in range(1, 51)]  
interaction_types = ['purchased', 'viewed', 'clicked','Added to Cart']

customer_names = [fake.name() for _ in range(100)] 

data = []
for _ in range(num_records):
    customer_id = random.choice(customers)
    customer_name = customer_names[customers.index(customer_id)]
    product_id = random.choice(products)
    interaction_type = random.choice(interaction_types)
    interaction_date = fake.date_between(start_date='-1y', end_date='today')  
    
    data.append({
        'customer_id': customer_id,
        'customer_name': customer_name,
        'product_id': product_id,
        'interaction_type': interaction_type,
        'interaction_date': interaction_date
    })

df = pd.DataFrame(data)

In [259]:
#SAVE THE FAKE DATA IN CSV 
df.to_csv("PRODUCT DATASET.csv",index = False)

In [260]:
#READ THE CSV FILE
df = pd.read_csv("d:\\BANK PROJECT\\PRODUCT DATASET.csv")


In [None]:
#DISPLAY THE DATAFRAME
df.head()

                                          DATA CLEANING

In [262]:
#MAPPING THE SCORE TO THE DATAFRAME BASED ON INTERACTION TYPE
interaction_mapping = {'purchased': 4,'Added to Cart':3 ,'viewed': 2, 'clicked': 1}
df['interaction_score'] = df['interaction_type'].map(interaction_mapping)

In [None]:
#CHECKING THE NULL VALUES
df.isna().sum()

In [None]:
#CHECKING THE DUPLICATED VALUE
df.duplicated().sum()

In [None]:
#DISPLAY THE DATAFRAME AFTER MAPPING 
df.head()

                                             EXPLORATORY DATA ANALYSIS

In [266]:
#IMPORTED LIBARIES FOR EDA AND CHECK OUTLIER
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#CHECKING THE OUTLIER
plt.figure(figsize=(6,6))
sns.boxplot(data=df)

In [None]:
#COUNT OF INTERACTION TYPE
sns.set(style="whitegrid")
plt.figure(figsize=(8, 5))
sns.countplot(x='interaction_type', data=df, palette='viridis')
plt.title('Distribution of Interaction Types')
plt.ylabel('Count')
plt.show()

In [None]:
# INTERACTION SCORE DISTRIBUTION
plt.figure(figsize=(8, 5))
sns.histplot(df['interaction_score'], kde=True, bins=30, color='blue')
plt.title('Distribution of Interaction Scores')
plt.xlabel('Interaction Score')
plt.ylabel('Frequency')
plt.show()

In [None]:
#TOP 10 MOST INTERACTED PRODUCTS
plt.figure(figsize=(10, 6))
top_products = df['product_id'].value_counts().head(10)
sns.barplot(x=top_products.index, y=top_products.values, palette='Blues_d')
plt.title('Top 10 Most Interacted Products')
plt.xlabel('Product ID')
plt.ylabel('Interaction Count')
plt.xticks(rotation=45)
plt.show()


In [None]:
#TOP 10 MOST INTERACTED CUSTOMERS
plt.figure(figsize=(10, 6))
top_customers = df['customer_id'].value_counts().head(10)
sns.barplot(x=top_customers.index, y=top_customers.values, palette='Blues_d')
plt.title('Top 10 Most Interacted Customers')
plt.xlabel('Customer ID')
plt.ylabel('Interaction Count')
plt.xticks(rotation=45)
plt.show()


                                            DATA PROCESSING AND MODEL TRAINING

In [272]:
#CREATING THE PIVOT TABLE FOR TRAINING
interaction_matrix = df.pivot_table(index='customer_id', columns='product_id', values='interaction_score', aggfunc='sum', fill_value=0)

In [None]:
#DISPLAY THE PIVOT TABLE
interaction_matrix

In [274]:
#REPLACE THE VALUE 0 WITH 0.01
interaction_matrix = interaction_matrix.replace(0, 0.01) 

In [275]:
#IMPORT CSR MATRIX TO HANDLE THE SPARSE DATA
from scipy.sparse import csr_matrix

In [276]:
#HANDLING THE SPARSE DATA
interaction_matrix_sparse = csr_matrix(interaction_matrix)

In [277]:
#TRAINING THE MODEL USING SVD 
svd = TruncatedSVD(n_components=50, random_state=42)
svd_matrix = svd.fit_transform(interaction_matrix_sparse)

In [278]:
#USED COSINE WHICH IS COLLABERATIVE FILTERING
svd_similarity = cosine_similarity(svd_matrix)
svd_similarity_df = pd.DataFrame(svd_similarity, index=interaction_matrix.index, columns=interaction_matrix.index)

In [None]:
#DISPLAY THE DATAFRAME 
svd_similarity_df

In [280]:
# FUNCTION TO RECOMMEND PRODUCTS USING SVD-BASED SIMILARITY FOR A GIVEN USER
def recommend_products_svd(user_id, num_recommendations=5):
    if user_id not in interaction_matrix.index:
        return f"User {user_id} not found in dataset."

    similar_users = svd_similarity_df[user_id].sort_values(ascending=False).iloc[1:].index

    similar_users_interactions = interaction_matrix.loc[similar_users]

    product_scores = similar_users_interactions.sum().sort_values(ascending=False)

    user_interactions = set(interaction_matrix.loc[user_id][interaction_matrix.loc[user_id] > 0].index)

    recommendations = [prod for prod in product_scores.index if prod not in user_interactions][:num_recommendations]

    return recommendations if recommendations else "No new recommendations available."

In [None]:
#IMPORTED JOBLIB AND SAVED THE MODEL USING JOBLIB
import joblib

joblib.dump(svd,r'd:\BANK PROJECT\svd.pkl')

joblib.dump(interaction_matrix, r'd:\BANK PROJECT\interaction mat.pkl')

joblib.dump(svd_similarity_df, r'd:\BANK PROJECT\svd sim.pkl')


In [None]:
# RECOMMEND PRODUCTS FOR A GIVEN USER BASED ON SVD SIMILARITY 
user_id = "CUST0003"
recommended_products_svd = recommend_products_svd(user_id)
print(f"Recommended products for User {user_id} with SVD: {recommended_products_svd}")

In [284]:
# Function to calculate precision at K
def precision_at_k(actual, predicted, k=5):
    return sum(1 for x in predicted[:k] if x in actual) / k

def recall_at_k(actual, predicted, k=5):
    return sum(1 for x in predicted[:k] if x in actual) / len(actual) if actual else 0

actual = ['PROD001', 'PROD002', 'PROD003']
predicted = ['PROD002', 'PROD003', 'PROD004']

precision = precision_at_k(actual, predicted, k=3)
recall = recall_at_k(actual, predicted, k=3)

precision_rounded = round(precision, 1)
recall_rounded = round(recall, 1)

print(f"Precision@K: {precision_rounded}")
print(f"Recall@K: {recall_rounded}")

Precision@K: 0.8
Recall@K: 0.8
