# E-commerce Product Recommendation System

This project implements a user-based collaborative filtering recommendation system 
using pandas & scikit-learn. It suggests products to users based on the ratings 
given by similar users.

- Uses cosine similarity
- Simple data example (MovieLens)
- Technologies: Python, pandas, sklearn

    

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries loaded.")


✅ Libraries loaded.


In [7]:
import pandas as pd

# ✅ Working dataset
url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv"
df = pd.read_csv(url)

# To mimic ecommerce, we'll rename columns
df = df.rename(columns={"user_id": "user_id", "book_id": "item_id", "rating": "rating"})
df = df[["user_id", "item_id", "rating"]]

print("✅ Data loaded.")
df.head()


✅ Data loaded.


Unnamed: 0,user_id,item_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [10]:
# Keep only most popular 200 items
popular_items = df['item_id'].value_counts().head(200).index
df_small = df[df['item_id'].isin(popular_items)]

# Keep only most active 500 users
active_users = df_small['user_id'].value_counts().head(500).index
df_small = df_small[df_small['user_id'].isin(active_users)]

# Now create the pivot table
user_item_matrix = df_small.pivot_table(index='user_id', columns='item_id', values='rating')

print(f"✅ Filtered dataset: {len(df_small['user_id'].unique())} users × {len(df_small['item_id'].unique())} items")
user_item_matrix.head()


✅ Filtered dataset: 500 users × 200 items


item_id,1,2,3,4,5,6,7,8,9,10,...,248,255,264,265,270,271,281,289,292,301
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
615,5.0,3.0,3.0,5.0,3.0,,5.0,,2.0,4.0,...,,,,5.0,,3.0,,4.0,,
1027,3.0,1.0,1.0,,3.0,,,,,,...,5.0,,,5.0,4.0,2.0,,3.0,,
1076,4.0,3.0,1.0,3.0,4.0,3.0,5.0,2.0,3.0,3.0,...,3.0,,,,,,,,,5.0
1126,5.0,4.0,,5.0,3.0,,4.0,,4.0,5.0,...,,,3.0,,5.0,,,,,
1169,4.0,3.0,,5.0,5.0,,,,,,...,4.0,,3.0,4.0,,4.0,,,3.0,3.0


In [12]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix.fillna(0))

# Create a DataFrame for easy lookup
user_similarity_df = pd.DataFrame(user_similarity, 
                                  index=user_item_matrix.index, 
                                  columns=user_item_matrix.index)

print("✅ User-User similarity matrix created.")
user_similarity_df.head()


✅ User-User similarity matrix created.


user_id,615,1027,1076,1126,1169,1296,1456,1719,1759,1772,...,51460,51577,51673,51838,52036,52049,52051,52518,53026,53292
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
615,1.0,0.415005,0.385756,0.483098,0.517316,0.538976,0.541117,0.480298,0.49117,0.412078,...,0.438563,0.429679,0.457397,0.474308,0.375734,0.535375,0.583637,0.387023,0.626505,0.497711
1027,0.415005,1.0,0.526405,0.367405,0.563696,0.337421,0.522619,0.308415,0.445906,0.497657,...,0.391548,0.251841,0.568865,0.359275,0.47624,0.417803,0.378025,0.538187,0.4567,0.555971
1076,0.385756,0.526405,1.0,0.507365,0.55343,0.479409,0.506222,0.488733,0.503995,0.574749,...,0.494914,0.25147,0.440904,0.505506,0.451154,0.586762,0.500459,0.486713,0.546981,0.529289
1126,0.483098,0.367405,0.507365,1.0,0.451623,0.598462,0.490211,0.619571,0.460918,0.483018,...,0.525296,0.325302,0.394302,0.541143,0.391124,0.550889,0.539157,0.490762,0.442089,0.376477
1169,0.517316,0.563696,0.55343,0.451623,1.0,0.448026,0.60733,0.427824,0.523878,0.584852,...,0.378626,0.275039,0.495538,0.413574,0.388155,0.512832,0.444825,0.505138,0.528185,0.519903


In [13]:
# Example: pick the first user in your dataset
sample_user = user_item_matrix.index[0]

# Generate recommendations
recommendations = recommend_items(sample_user, user_item_matrix, user_similarity_df, top_n=5)

print(f"✅ Recommended items for user {sample_user}: {recommendations}")


✅ Recommended items for user 615: [25, 27, 21, 42, 23]


In [14]:
# ✅ Recommend items for multiple users
for user in user_item_matrix.index[:5]:
    recs = recommend_items(user, user_item_matrix, user_similarity_df, top_n=5)
    print(f"Recommendations for user {user}: {recs}")
    print("-" * 60)


Recommendations for user 615: [25, 27, 21, 42, 23]
------------------------------------------------------------
Recommendations for user 1027: [4, 10, 24, 21, 7]
------------------------------------------------------------
Recommendations for user 1076: [11, 43, 33, 22, 76]
------------------------------------------------------------
Recommendations for user 1126: [32, 8, 28, 14, 22]
------------------------------------------------------------
Recommendations for user 1169: [10, 25, 27, 21, 7]
------------------------------------------------------------


In [15]:
import pandas as pd

recommendations_list = []

for user in user_item_matrix.index[:5]:
    recs = recommend_items(user, user_item_matrix, user_similarity_df, top_n=5)
    recommendations_list.append({"User": user, "Recommendations": recs})

pd.DataFrame(recommendations_list)


Unnamed: 0,User,Recommendations
0,615,"[25, 27, 21, 42, 23]"
1,1027,"[4, 10, 24, 21, 7]"
2,1076,"[11, 43, 33, 22, 76]"
3,1126,"[32, 8, 28, 14, 22]"
4,1169,"[10, 25, 27, 21, 7]"


In [16]:
user_similarity_df.to_csv("user_similarity_matrix.csv")
user_item_matrix.to_csv("user_item_matrix.csv")
