In [2]:
import pandas as pd

# Load both CSV files
customers = pd.read_csv("customer_data_collection.csv")
products = pd.read_csv("product_recommendation_data.csv")

# Remove extra 'Unnamed' columns (auto-generated from CSV exports)
customers = customers.loc[:, ~customers.columns.str.contains('^Unnamed')]
products = products.loc[:, ~products.columns.str.contains('^Unnamed')]

# Show first few rows
print("Customers Data:\n", customers.head(), "\n")
print("Products Data:\n", products.head())


Customers Data:
   Customer_ID  Age  Gender   Location                 Browsing_History  \
0       C1000   28  Female    Chennai             ['Books', 'Fashion']   
1       C1001   27    Male      Delhi  ['Books', 'Fitness', 'Fashion']   
2       C1002   34   Other    Chennai                  ['Electronics']   
3       C1003   23    Male  Bangalore                   ['Home Decor']   
4       C1004   24   Other    Kolkata        ['Fashion', 'Home Decor']   

                               Purchase_History    Customer_Segment  \
0                        ['Biography', 'Jeans']         New Visitor   
1  ['Biography', 'Resistance Bands', 'T-shirt']  Occasional Shopper   
2                                ['Smartphone']  Occasional Shopper   
3                                  ['Wall Art']      Frequent Buyer   
4                             ['Shoes', 'Lamp']      Frequent Buyer   

   Avg_Order_Value Holiday  Season  
0          4806.99      No  Winter  
1           795.03     Yes  Autumn  


In [3]:
# Convert to lowercase and strip spaces
customers['Purchase_History'] = customers['Purchase_History'].astype(str).str.lower().str.strip()
products['Subcategory'] = products['Subcategory'].astype(str).str.lower().str.strip()

# Check if cleaned
print(customers[['Customer_ID', 'Purchase_History']].head())
print(products[['Product_ID', 'Subcategory']].head())


  Customer_ID                              Purchase_History
0       C1000                        ['biography', 'jeans']
1       C1001  ['biography', 'resistance bands', 't-shirt']
2       C1002                                ['smartphone']
3       C1003                                  ['wall art']
4       C1004                             ['shoes', 'lamp']
  Product_ID Subcategory
0      P2000       jeans
1      P2001    lipstick
2      P2002      laptop
3      P2003      comics
4      P2004    cushions


In [4]:
# Function to recommend products based on Purchase History
def recommend_products(purchase_history):
    matched_products = products[products['Subcategory'].isin(purchase_history.split(','))]
    return matched_products[['Product_ID', 'Category', 'Subcategory', 'Probability_of_Recommendation']]

# Apply this function to each customer
customers['Recommended_Products'] = customers['Purchase_History'].apply(recommend_products)

# Show recommendations for first 5 customers
for index, row in customers.head(5).iterrows():
    print(f"\nCustomer ID: {row['Customer_ID']}")
    print(f"Purchase History: {row['Purchase_History']}")
    print("Recommended Products:")
    print(row['Recommended_Products'])



Customer ID: C1000
Purchase History: ['biography', 'jeans']
Recommended Products:
Empty DataFrame
Columns: [Product_ID, Category, Subcategory, Probability_of_Recommendation]
Index: []

Customer ID: C1001
Purchase History: ['biography', 'resistance bands', 't-shirt']
Recommended Products:
Empty DataFrame
Columns: [Product_ID, Category, Subcategory, Probability_of_Recommendation]
Index: []

Customer ID: C1002
Purchase History: ['smartphone']
Recommended Products:
Empty DataFrame
Columns: [Product_ID, Category, Subcategory, Probability_of_Recommendation]
Index: []

Customer ID: C1003
Purchase History: ['wall art']
Recommended Products:
Empty DataFrame
Columns: [Product_ID, Category, Subcategory, Probability_of_Recommendation]
Index: []

Customer ID: C1004
Purchase History: ['shoes', 'lamp']
Recommended Products:
Empty DataFrame
Columns: [Product_ID, Category, Subcategory, Probability_of_Recommendation]
Index: []
