In [51]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Sample Data: Replace this with your dataset
data = pd.read_csv('Orders_New_query_2024_07_01.csv')

data['billing_address_state'] = data['billing_address_state'].str.strip().str.upper()

# Create an area-item matrix where rows are areas and columns are items, values are sum of ordered_quantity
area_item_matrix = data.pivot_table(index='billing_address_state', columns='sku_id', values='ordered_quantity', aggfunc='sum').fillna(0)

# Compute item-item similarity using cosine similarity
item_similarity_matrix = pd.DataFrame(cosine_similarity(area_item_matrix.T), index=area_item_matrix.columns, columns=area_item_matrix.columns)

# Function to recommend items based on area and calculate similarity
def recommend_items_with_similarity(area, item, area_item_matrix, item_similarity_matrix, n_recommendations=5):
    # Convert area to uppercase to match the format in the dataset
    area = area.strip().upper()
    
    # Check if the area is in the matrix
    if area not in area_item_matrix.index:
        return f"Area '{area}' not found in the dataset."

    # Get items bought in this area and sort by popularity
    items_in_area = area_item_matrix.loc[area].sort_values(ascending=False)

    # If the input item is not in the area or its quantity is zero, return a message
    if item not in items_in_area or items_in_area[item] == 0:
        return f"The item '{item}' has not been bought in area '{area}'. Popular items:\n{items_in_area.head(n_recommendations)}"
    
    # Recommend other items frequently bought in this area, excluding the given item
    recommendations = items_in_area[items_in_area.index != item].head(n_recommendations)
    
    # Get similarity scores for recommended items compared to the input item
    similarity_scores = item_similarity_matrix.loc[item, recommendations.index]
    
    # Combine recommendations with similarity scores
    recommendations_with_similarity = pd.DataFrame({
        'Recommended Item': recommendations.index,
        'Ordered Quantity': recommendations.values,
        'Similarity Score': similarity_scores.values
    }).sort_values(by='Similarity Score', ascending=False)

    return recommendations_with_similarity

# Example usage:
area = "TAMIL NADU"  # Replace with the desired area
input_item = 'amazon_B0CTMQ4YY7'  # Replace with the SKU ID of the item input by the user
recommendations_with_similarity = recommend_items_with_similarity(area, input_item, area_item_matrix, item_similarity_matrix, 20)

# Print recommendations with similarity scores
print(f"Recommended items for area '{area}' excluding item '{input_item}':\n", recommendations_with_similarity)



Recommended items for area 'TAMIL NADU' excluding item 'amazon_B0CTMQ4YY7':
                         Recommended Item  Ordered Quantity  Similarity Score
2                      amazon_B0CTMQZ659              36.0          0.988317
3                      amazon_B0CTMQF324              36.0          0.985451
0                      amazon_B0CTMQPVJ2              67.0          0.983242
6                      amazon_B0CTQBR3S3              26.0          0.977465
12                     amazon_B0CTQ1NHQQ              18.0          0.968687
18  shopify_9315201286433_48922095124769              14.0          0.964617
7                      amazon_B0CTQH1B7H              23.0          0.947378
1   shopify_9315443441953_48922373062945              45.0          0.946729
9   shopify_9315034366241_48921914212641              22.0          0.945033
5                      amazon_B0CQK7KQPW              27.0          0.939178
4                      amazon_B0CQK7NG2H              28.0          0.935044