<a href="https://colab.research.google.com/github/Hello-rashmi/retail-recommendation/blob/main/retail%20recoomendation%20system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Importing libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Loading the Excel file from the correct path
file_path = '/content/drive/MyDrive/OnlineRetail (1).xlsx'
df = pd.read_excel(file_path)

# Cleaning the data
df.dropna(subset=["CustomerID", "Description"], inplace=True)
df = df[df["Quantity"] > 0]

# Creating user-item matrix
basket = df.pivot_table(index='CustomerID', columns='Description', values='Quantity', aggfunc='sum', fill_value=0)

# Converting to binary format (1 = purchased)
basket_binary = basket.applymap(lambda x: 1 if x > 0 else 0)

# Calculating item-item cosine similarity
item_similarity = cosine_similarity(basket_binary.T)

# Building a similarity DataFrame
item_similarity_df = pd.DataFrame(item_similarity, index=basket.columns, columns=basket.columns)

# Defining the recommendation function
def recommend_items(Description, top_n=5):
    if Description not in item_similarity_df.columns:
        return f"❌ '{Description}' not found in dataset."
    similar_items = item_similarity_df[Description].sort_values(ascending=False)[1:top_n+1]
    return similar_items

# Example usage
product_name = "PRODUCT NAME"  # Using differest products name from description to customize recommendation
recommendations = recommend_items(product_name, top_n=5)

# Results
print(f"\nTop 5 product recommendations for: '{product_name}'\n")
print(recommendations)


# Defining the top customers or products
top_customers = df.groupby("CustomerID")["Quantity"].sum().sort_values(ascending=False).head(10)
print("📈 Top 10 Customers by Quantity Purchased:\n")
print(top_customers)



# Defining the top customers or products
top_products = df.groupby("Description")["Quantity"].sum().sort_values(ascending=False).head(10)
print("\n🛍️ Top 10 Most Purchased Products:\n")
print(top_products)



# Defining the top three products from each country
top_by_country = (
    df.groupby(["Country", "Description"])["Quantity"]
    .sum()
    .reset_index()
    .sort_values(["Country", "Quantity"], ascending=[True, False])
)

# Now extract top 3 per country
top3_country = top_by_country.groupby("Country").head(3)
print("\n🌍 Top 3 Products by Country:\n")
print(top3_country)



# Graph plotting between country and total quantity sold
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Group by country and get total quantity sold
country_sales = df.groupby("Country")["Quantity"].sum().sort_values(ascending=False)

# Plot
plt.figure(figsize=(15, 6))
sns.barplot(x=country_sales.index, y=country_sales.values, palette="viridis")

# Adding labels and title
plt.xlabel("Country")
plt.ylabel("Total Quantity Sold")
plt.title("Total Quantity Sold by Country (Descending Order)")
plt.xticks(rotation=75)
plt.tight_layout()

# Show plot
plt.show()

