# 📦 Amazon Recommendation System
This notebook demonstrates loading data, exploring it, and building popularity-based and collaborative filtering models.

In [None]:
# 📚 Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD
from surprise import KNNWithMeans, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split as surprise_train_test_split
import warnings; warnings.simplefilter('ignore')

%matplotlib inline

## 📂 Load Dataset & Add Headers

In [None]:
electronics_data = pd.read_csv(
    "/kaggle/input/amazon-product-reviews/ratings_Electronics (1).csv",
    names=['userId', 'productId', 'Rating', 'timestamp']
)
electronics_data.head()

## 📏 Dataset Overview

In [None]:
print("Shape of data:", electronics_data.shape)
print("\nData Types:\n", electronics_data.dtypes)
electronics_data.info()
print("\nRating Summary:\n", electronics_data.describe()['Rating'].T)
print('Minimum rating:', electronics_data.Rating.min())
print('Maximum rating:', electronics_data.Rating.max())

## 🧹 Handling Missing Values

In [None]:
print('\nMissing values per column:\n', electronics_data.isnull().sum())

## 📊 Rating Distribution

In [None]:
sns.countplot(x="Rating", data=electronics_data)
plt.title("Distribution of Ratings")
plt.show()

## 👤 Unique Users & Products

In [None]:
print("\nTotal no of ratings :", electronics_data.shape[0])
print("Total no of Users   :", electronics_data['userId'].nunique())
print("Total no of products:", electronics_data['productId'].nunique())

## 🕒 Drop Timestamp Column

In [None]:
electronics_data.drop(['timestamp'], axis=1, inplace=True)

## 📈 Analyze Ratings per User

In [None]:
no_of_rated_products_per_user = electronics_data.groupby('userId')['Rating'].count().sort_values(ascending=False)
print("\nTop users by no. of ratings:\n", no_of_rated_products_per_user.head())
print("\nDescribe:\n", no_of_rated_products_per_user.describe())

quantiles = no_of_rated_products_per_user.quantile(np.arange(0,1.01,0.01), interpolation='higher')
plt.figure(figsize=(10,6))
plt.title("Quantiles of No. of Ratings per User")
quantiles.plot()
plt.ylabel('No of ratings')
plt.xlabel('Quantile')
plt.show()

print('\nNumber of users with more than 50 ratings:', sum(no_of_rated_products_per_user >= 50))

## ⭐ Popularity Based Recommendation

In [None]:
new_df = electronics_data.groupby("productId").filter(lambda x: x['Rating'].count() >=50)

no_of_ratings_per_product = new_df.groupby('productId')['Rating'].count().sort_values(ascending=False)
plt.figure(figsize=(12,5))
plt.plot(no_of_ratings_per_product.values)
plt.title('# Ratings per Product')
plt.xlabel('Products')
plt.ylabel('No of Ratings')
plt.show()

print("\nAverage rating per product:\n", new_df.groupby('productId')['Rating'].mean().head())

popular_products = pd.DataFrame(new_df.groupby('productId')['Rating'].count())
most_popular = popular_products.sort_values('Rating', ascending=False)
most_popular.head(30).plot(kind="bar", figsize=(12,5), title="Top 30 Most Rated Products")
plt.show()

## 🔍 Collaborative Filtering (Item-Item)

In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(new_df[['userId','productId','Rating']], reader)
trainset, testset = surprise_train_test_split(data, test_size=0.3, random_state=10)

algo = KNNWithMeans(k=5, sim_options={'name': 'pearson_baseline', 'user_based': False})
algo.fit(trainset)

test_pred = algo.test(testset)
print("\nItem-based CF Model: Test Set RMSE")
accuracy.rmse(test_pred, verbose=True)

## 🧠 Model-Based Collaborative Filtering (SVD)

In [None]:
new_df1 = new_df.head(10000)
ratings_matrix = new_df1.pivot_table(values='Rating', index='userId', columns='productId', fill_value=0)
print("\nUtility matrix shape:", ratings_matrix.shape)

X = ratings_matrix.T
SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)

correlation_matrix = np.corrcoef(decomposed_matrix)

product_id = 'B00000K135'
product_idx = list(X.index).index(product_id)
corr_product = correlation_matrix[product_idx]
recommend = list(X.index[corr_product > 0.65])

if product_id in recommend:
    recommend.remove(product_id)

print("\nTop Recommended Products:\n", recommend[:24])

## ✅ Done!

In [None]:
print("\nProject finished! 🎉")