# Segmenting Users Dynamically- Recommender Systems

In [6]:
import pandas as pd
df=pd.read_csv('CustomData.csv')
df.head()

Similarity Matrix:
[[1.         0.66666667 0.66666667 0.66666667 0.66666667]
 [0.66666667 1.         0.33333333 0.66666667 0.66666667]
 [0.66666667 0.33333333 1.         0.33333333 0.66666667]
 [0.66666667 0.66666667 0.33333333 1.         0.33333333]
 [0.66666667 0.66666667 0.66666667 0.33333333 1.        ]]


In [7]:
# Group distinct users with the same product tag
grouped = df.groupby(['ProductTag', 'UserID']).size().reset_index(name='Count')
# Display the grouped data
print(grouped)

  super()._check_params_vs_input(X, default_n_init=10)


User Labels: [0 0 1 0 1]


In [8]:
from gensim.models import FastText

product_tags = df['ProductTag'].tolist()
# Train FastText model on product tags
model = FastText(sentences=[product_tags], min_count=1, vector_size=100, window=5, sg=1)

# Function to find similar product tags based on FastText embeddings
def find_similar_tags(tags):
    similar_groups = []
    for tag in tags:
        similar_tags = model.wv.most_similar(tag, topn=5)  # Get top 5 most similar tags
        similar_group = [tag] + [similar_tag[0] for similar_tag in similar_tags]
        if len(similar_group) > 1 and similar_group not in similar_groups:
            similar_groups.append(similar_group)
    return similar_groups

# Find similar product tags
similar_groups = find_similar_tags(product_tags)

# Display similar groups
for group in similar_groups:
    print("Similar group:", group)


Recommendations for User 1: ['D']
Recommendations for User 2: ['B']
Recommendations for User 3: ['A']
Recommendations for User 4: ['C']
Recommendations for User 5: ['B']


In [9]:
from collections import defaultdict

from gensim.models import Word2Vec

product_tags = df['ProductTag'].tolist()

model = Word2Vec([product_tags], min_count=1, vector_size=100)  # Adjust parameters as needed

def find_similar_tags(tags, max_groups=5):  # Specify the maximum number of groups
    similar_groups = []
    for tag in tags:
        similar_tags = model.wv.most_similar(tag, topn=5)  # Get top 5 most similar tags
        similar_group = [tag] + [similar_tag[0] for similar_tag in similar_tags]
        if len(similar_group) > 1 and similar_group not in similar_groups:
            similar_groups.append(similar_group)
            if len(similar_groups) >= max_groups:  # Check if the maximum number of groups is reached
                break
    
    # Create a dictionary to map each tag to its corresponding group
    tag_to_group = defaultdict(list)
    for group in similar_groups:
        for tag in group:
            tag_to_group[tag].append(group)
    
    # Assign each tag to the most similar group
    assigned_groups = []
    for tag in tags:
        if tag_to_group[tag]:  # Check if the list of groups for the tag is not empty
            most_similar_group = max(tag_to_group[tag], key=lambda x: len(set(x) & set(tags)))
            if most_similar_group not in assigned_groups:
                assigned_groups.append(most_similar_group)
    
    return assigned_groups

assigned_groups = find_similar_tags(product_tags)

for group in assigned_groups:
    print("Assigned group:", group)


  super()._check_params_vs_input(X, default_n_init=10)


Updated Recommendations for User 1: ['D']
Updated Recommendations for User 2: ['B']
Updated Recommendations for User 3: ['A']
Updated Recommendations for User 4: ['C']
Updated Recommendations for User 5: ['B']
Updated User Clusters:
User 1 belongs to Cluster 1
User 2 belongs to Cluster 1
User 3 belongs to Cluster 0
User 4 belongs to Cluster 1
User 5 belongs to Cluster 0


In [16]:
from collections import defaultdict

# Assuming df contains user data with columns UserID and ProductTag
# and assigned_groups contains the assigned groups as obtained previously

# Create a dictionary to map each product tag to its corresponding group(s)
tag_to_group = defaultdict(list)
for idx, group in enumerate(assigned_groups):
    for tag in group:
        tag_to_group[tag].append(idx)  # Use index of the group instead of the group itself

# Initialize a dictionary to store the groups each user belongs to
user_to_groups = defaultdict(list)

# Iterate through each user and assign them to groups based on their ProductTag
for index, row in df.iterrows():
    user_id = row['UserID']
    product_tags = row['ProductTag'].split(',')  # Split tags if they are comma-separated
    for tag in product_tags:
        if tag in tag_to_group:
            user_to_groups[user_id].extend(tag_to_group[tag])

# Organize users into groups
groups_users = defaultdict(list)
for user, groups in user_to_groups.items():
    for group in groups:
        groups_users[group].append(user)

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {users}")


Recommendations for New User 1: ['A', 'C', 'B', 'D', 'E']


In [None]:
import pandas as pd
import random

# Initialize empty lists
user_ids = []
product_tags = []
city_names = []
product_ids = []

# Generate 15 data points for each field

user_ids.extend([''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=3)) for _ in range(15)])
product_tags.extend(random.choices(['tech', 'fashion', 'skincare', 'makeup', 'electronics', 'beauty', 'gadgets', 'outdoor', 'sports', 'health', 'fitness', 'toys', 'books', 'kitchenware', 'jewelry', 'watches', 'automotive', 'pets', 'travel', 'gaming', 'music', 'art', 'photography', 'diy', 'craft', 'stationery', 'baby', 'food', 'drinks', 'gardening', 'camping'], k=15))
city_names.extend(random.choices(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose', 'Austin', 'Jacksonville', 'San Francisco', 'Indianapolis', 'Columbus', 'Fort Worth', 'Charlotte', 'Seattle', 'Denver', 'Washington', 'Boston', 'El Paso', 'Detroit', 'Nashville', 'Portland', 'Memphis', 'Oklahoma City'], k=15))
product_ids.extend([str(random.randint(100000, 999999)) for _ in range(15)])

# Create DataFrame
data = {
    'UserID': user_ids,
    'ProductTag': product_tags,
    'Region': city_names,
    'ProductID': product_ids
}

df = pd.DataFrame(data)

# Read existing CustomData.csv file
existing_data = pd.read_csv('CustomData.csv')

# Concatenate existing data with new data
updated_data = pd.concat([existing_data, df])

# Write updated DataFrame to CSV
updated_data.to_csv('CustomData.csv', index=False)


In [None]:
df=pd.read_csv('CustomData.csv')
tag_to_group = defaultdict(list)
for idx, group in enumerate(assigned_groups):
    for tag in group:
        tag_to_group[tag].append(idx)  # Use index of the group instead of the group itself

# Initialize a dictionary to store the groups each user belongs to
user_to_groups = defaultdict(list)

# Iterate through each user and assign them to groups based on their ProductTag
for index, row in df.iterrows():
    user_id = row['UserID']
    product_tags = row['ProductTag'].split(',')  # Split tags if they are comma-separated
    for tag in product_tags:
        if tag in tag_to_group:
            user_to_groups[user_id].extend(tag_to_group[tag])

# Organize users into groups
groups_users = defaultdict(set)  # Changed to set to ensure distinct values
for user, groups in user_to_groups.items():
    for group in groups:
        groups_users[group].add(user)  # Use add instead of append for sets

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {list(users)}")  # Convert set to list for printing


In [None]:
import pandas as pd
from collections import defaultdict

# Load the data
df = pd.read_csv('CustomData.csv')

# Define the new user's region
new_user_region = 'Kolkata'

# Find the group with the maximum users
group_with_max_users = max(groups_users, key=lambda x: len(groups_users[x]))

# Find users from the same region as the new user
users_from_same_region = [user for user, region in zip(df['UserID'], df['Region']) if region == new_user_region]
if len(users_from_same_region)>0:
    print('User(s) from same region: ',users_from_same_region)
# Add the new user 'xyz' to the group with the maximum users
group_with_max_users_users = set(groups_users[group_with_max_users])
group_with_max_users_users.add('xyz')
groups_users[group_with_max_users] = group_with_max_users_users

# If users from the same region exist, add 'xyz' to the corresponding group
if users_from_same_region:
    user_from_same_region = users_from_same_region[0]  # Assuming only one user from the same region
    group_with_same_region = next((group for group, users in groups_users.items() if user_from_same_region in users), None)
    if group_with_same_region is not None:
        group_with_same_region_users = set(groups_users[group_with_same_region])
        group_with_same_region_users.add('xyz')
        groups_users[group_with_same_region] = group_with_same_region_users

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {users}")


In [None]:
df_groups_users= pd.DataFrame(sorted_groups_users, columns=['Group', 'Users'])
df_groups_users= df_groups_users.drop(columns=['Group'])
df_groups_users['Assigned_Group'] = [assigned_groups[group] for group, _ in sorted_groups_users]
df_groups_users.to_csv('Output.csv', index=False)
df_groups_users.head()