In [14]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = "shopping_behavior_updated.csv"
df = pd.read_csv(file_path)

# Select relevant features
features = ['Item Purchased', 'Color', 'Season']

# One-hot encode categorical features
df_encoded = pd.get_dummies(df[features])

# Compute similarity matrix
similarity_matrix = cosine_similarity(df_encoded)

# Convert to DataFrame with appropriate indexing
similarity_df = pd.DataFrame(similarity_matrix, index=df[features].apply(lambda x: f"{x['Item Purchased']}-{x['Color']}-{x['Season']}", axis=1), 
                             columns=df[features].apply(lambda x: f"{x['Item Purchased']}-{x['Color']}-{x['Season']}", axis=1))
similarity_df.to_csv('test_similiarity.csv')

In [22]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = "shopping_behavior_updated.csv"
df = pd.read_csv(file_path)

# Select relevant features
features = ['Customer ID', 'Item Purchased', 'Color', 'Season']

# Create unique item identifier
df['Item_ID'] = df.apply(lambda x: f"{x['Item Purchased']}-{x['Color']}-{x['Season']}", axis=1)

# Create User-Item Interaction Matrix
user_item_matrix = df.pivot_table(index='Customer ID', columns='Item_ID', aggfunc='size', fill_value=0)

# Compute Item Similarity
item_similarity = cosine_similarity(user_item_matrix.T)

# Convert to DataFrame
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# Function to Recommend Items
def recommend_items(user_id, user_item_matrix, item_similarity_df, top_n=5):
    user_purchases = user_item_matrix.loc[user_id]
    purchased_items = user_purchases[user_purchases > 0].index.tolist()
    
    scores = pd.Series(dtype=float)
    for item in purchased_items:
        similar_items = item_similarity_df[item].drop(purchased_items, errors='ignore')
        scores = scores.add(similar_items, fill_value=0)
    
    return scores.sort_values(ascending=False).head(top_n)

# Example: Get recommendations for a random user
example_user = df['Customer ID'].sample(1).values[0]
recommended_items = recommend_items(example_user, user_item_matrix, item_similarity_df)

recommended_items

Item_ID
Backpack-Beige-Fall    0.0
Shirt-Orange-Summer    0.0
Shirt-Red-Winter       0.0
Shirt-Red-Summer       0.0
Shirt-Red-Spring       0.0
dtype: float64

In [34]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances

# Load the dataset
file_path = "shopping_behavior_updated.csv"
df = pd.read_csv(file_path)

# Select relevant features
features = ['Customer ID', 'Item Purchased', 'Color', 'Season']

# Create unique item identifier
df['Item_ID'] = df.apply(lambda x: f"{x['Item Purchased']}-{x['Color']}-{x['Season']}", axis=1)

# Create User-Item Interaction Matrix
user_item_matrix = df.pivot_table(index='Customer ID', columns='Item_ID', aggfunc='size', fill_value=0)

# Convert to binary NumPy array for Jaccard Similarity
user_item_matrix_np = user_item_matrix.astype(bool).values

# Compute Item Similarity using Jaccard Similarity
item_similarity = 1 - pairwise_distances(user_item_matrix_np.T, metric="jaccard")

# Convert to DataFrame
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# Function to Recommend Items
def recommend_items(user_id, user_item_matrix, item_similarity_df, top_n=5):
    user_purchases = user_item_matrix.loc[user_id]
    purchased_items = user_purchases[user_purchases > 0].index.tolist()
    
    scores = pd.Series(dtype=float)
    for item in purchased_items:
        similar_items = item_similarity_df[item].drop(purchased_items, errors='ignore')
        scores = scores.add(similar_items, fill_value=0)
    
    return scores.sort_values(ascending=False).head(top_n)

# Example: Get recommendations for a random user
example_user = df['Customer ID'].sample(1).values[0]
recommended_items = recommend_items(example_user, user_item_matrix, item_similarity_df)

recommended_items


Item_ID
Backpack-Beige-Fall    0.0
Shirt-Orange-Summer    0.0
Shirt-Red-Winter       0.0
Shirt-Red-Summer       0.0
Shirt-Red-Spring       0.0
dtype: float64