# Dynamic Segmentation of Users in a Recommender System

In [1]:
import pandas as pd
df=pd.read_csv('CustomData.csv')
df.head()

Unnamed: 0,UserID,Region,ProductID,ProductTag
0,abc,New York,879652,tech
1,bde,London,345871,fashion
2,sed,Tokyo,998743,tech
3,sdj,Paris,234567,skincare
4,ikl,Los Angeles,786543,makeup


In [2]:
# Group distinct users with the same product tag
grouped = df.groupby(['ProductTag', 'UserID']).size().reset_index(name='Count')
# Display the grouped data
print(grouped)

     ProductTag UserID  Count
0   accessories    def      1
1           art    sdk      1
2    automotive    aub      1
3    automotive    okj      1
4          baby    rty      1
5          baby    shd      1
6        beauty    pqr      1
7        beauty    sdk      1
8         books    bde      1
9       camping    abc      1
10      camping    uuh      1
11        craft    res      1
12        craft    sgg      1
13          diy    tyu      1
14       drinks    uyt      2
15  electronics    mno      1
16  electronics    sed      1
17      fashion    bde      1
18      fashion    ikl      1
19      fitness    ikl      1
20         food    shd      1
21      gadgets    ikl      1
22       gaming    ikl      1
23    gardening    qts      1
24       health    hmu      1
25       health    mno      1
26       hiking    inn      1
27      jewelry    olk      1
28  kitchenware    okj      2
29       makeup    ikl      1
30        music    inn      1
31        music    sdj      1
32      ou

In [3]:
from gensim.models import FastText

product_tags = df['ProductTag'].tolist()
# Train FastText model on product tags
model = FastText(sentences=[product_tags], min_count=1, vector_size=100, window=5, sg=1)

# Function to find similar product tags based on FastText embeddings
def find_similar_tags(tags):
    similar_groups = []
    for tag in tags:
        similar_tags = model.wv.most_similar(tag, topn=5)  # Get top 5 most similar tags
        similar_group = [tag] + [similar_tag[0] for similar_tag in similar_tags]
        if len(similar_group) > 1 and similar_group not in similar_groups:
            similar_groups.append(similar_group)
    return similar_groups

# Find similar product tags
similar_groups = find_similar_tags(product_tags)

# Display similar groups
for group in similar_groups:
    print("Similar group:", group)


Similar group: ['tech', 'toys', 'gaming', 'kitchenware', 'sports', 'accessories']
Similar group: ['fashion', 'outdoor', 'baby', 'art', 'health', 'music']
Similar group: ['skincare', 'photography', 'kitchenware', 'makeup', 'gardening', 'books']
Similar group: ['makeup', 'health', 'stationery', 'skincare', 'art', 'gaming']
Similar group: ['electronics', 'music', 'outdoor', 'food', 'watches', 'travel']
Similar group: ['beauty', 'drinks', 'music', 'gardening', 'camping', 'hiking']
Similar group: ['accessories', 'camping', 'gardening', 'diy', 'drinks', 'hiking']
Similar group: ['gadgets', 'craft', 'sports', 'fitness', 'music', 'pets']
Similar group: ['outdoor', 'music', 'fashion', 'kitchenware', 'gardening', 'baby']
Similar group: ['sports', 'pets', 'automotive', 'gadgets', 'toys', 'music']
Similar group: ['health', 'gardening', 'food', 'hiking', 'art', 'makeup']
Similar group: ['fitness', 'stationery', 'automotive', 'gadgets', 'music', 'sports']
Similar group: ['toys', 'diy', 'tech', 'spor

In [4]:
from collections import defaultdict

from gensim.models import Word2Vec

product_tags = df['ProductTag'].tolist()

model = Word2Vec([product_tags], min_count=1, vector_size=100)  # Adjust parameters as needed

def find_similar_tags(tags, max_groups=5):  # Specify the maximum number of groups
    similar_groups = []
    for tag in tags:
        similar_tags = model.wv.most_similar(tag, topn=5)  # Get top 5 most similar tags
        similar_group = [tag] + [similar_tag[0] for similar_tag in similar_tags]
        if len(similar_group) > 1 and similar_group not in similar_groups:
            similar_groups.append(similar_group)
            if len(similar_groups) >= max_groups:  # Check if the maximum number of groups is reached
                break
    
    # Create a dictionary to map each tag to its corresponding group
    tag_to_group = defaultdict(list)
    for group in similar_groups:
        for tag in group:
            tag_to_group[tag].append(group)
    
    # Assign each tag to the most similar group
    assigned_groups = []
    for tag in tags:
        if tag_to_group[tag]:  # Check if the list of groups for the tag is not empty
            most_similar_group = max(tag_to_group[tag], key=lambda x: len(set(x) & set(tags)))
            if most_similar_group not in assigned_groups:
                assigned_groups.append(most_similar_group)
    
    return assigned_groups

assigned_groups = find_similar_tags(product_tags)

for group in assigned_groups:
    print("Assigned group:", group)


Assigned group: ['tech', 'toys', 'beauty', 'diy', 'food', 'watches']
Assigned group: ['fashion', 'fitness', 'gardening', 'photography', 'books', 'music']
Assigned group: ['skincare', 'camping', 'pets', 'books', 'makeup', 'photography']
Assigned group: ['electronics', 'baby', 'craft', 'music', 'books', 'jewelry']
Assigned group: ['makeup', 'photography', 'watches', 'kitchenware', 'skincare', 'craft']


In [5]:
from collections import defaultdict

# Assuming df contains user data with columns UserID and ProductTag
# and assigned_groups contains the assigned groups as obtained previously

# Create a dictionary to map each product tag to its corresponding group(s)
tag_to_group = defaultdict(list)
for idx, group in enumerate(assigned_groups):
    for tag in group:
        tag_to_group[tag].append(idx)  # Use index of the group instead of the group itself

# Initialize a dictionary to store the groups each user belongs to
user_to_groups = defaultdict(list)

# Iterate through each user and assign them to groups based on their ProductTag
for index, row in df.iterrows():
    user_id = row['UserID']
    product_tags = row['ProductTag'].split(',')  # Split tags if they are comma-separated
    for tag in product_tags:
        if tag in tag_to_group:
            user_to_groups[user_id].extend(tag_to_group[tag])

# Organize users into groups
groups_users = defaultdict(list)
for user, groups in user_to_groups.items():
    for group in groups:
        groups_users[group].append(user)

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {users}")


Group 0: ['abc', 'sed', 'pqr', 'sdk', 'hmu', 'ukl', 'ahj', 'tyu', 'shd']
Group 1: ['bde', 'bde', 'sdj', 'ikl', 'ikl', 'inn', 'ihk', 'bhu', 'qts']
Group 2: ['abc', 'abc', 'bde', 'sdj', 'ikl', 'ahj', 'ihk', 'bhu', 'uuh']
Group 3: ['bde', 'sed', 'sdj', 'mno', 'olk', 'inn', 'sgg', 'res', 'rty', 'shd']
Group 4: ['abc', 'sdj', 'ikl', 'okj', 'okj', 'ahj', 'ihk', 'bhu', 'sgg', 'res']


In [6]:
import pandas as pd
import random

# Initialize empty lists
user_ids = []
product_tags = []
city_names = []
product_ids = []

# Generate 15 data points for each field

user_ids.extend([''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=3)) for _ in range(15)])
product_tags.extend(random.choices(['tech', 'fashion', 'skincare', 'makeup', 'electronics', 'beauty', 'gadgets', 'outdoor', 'sports', 'health', 'fitness', 'toys', 'books', 'kitchenware', 'jewelry', 'watches', 'automotive', 'pets', 'travel', 'gaming', 'music', 'art', 'photography', 'diy', 'craft', 'stationery', 'baby', 'food', 'drinks', 'gardening', 'camping'], k=15))
city_names.extend(random.choices(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose', 'Austin', 'Jacksonville', 'San Francisco', 'Indianapolis', 'Columbus', 'Fort Worth', 'Charlotte', 'Seattle', 'Denver', 'Washington', 'Boston', 'El Paso', 'Detroit', 'Nashville', 'Portland', 'Memphis', 'Oklahoma City'], k=15))
product_ids.extend([str(random.randint(100000, 999999)) for _ in range(15)])

# Create DataFrame
data = {
    'UserID': user_ids,
    'ProductTag': product_tags,
    'Region': city_names,
    'ProductID': product_ids
}

df = pd.DataFrame(data)

# Read existing CustomData.csv file
existing_data = pd.read_csv('CustomData.csv')

# Concatenate existing data with new data
updated_data = pd.concat([existing_data, df])

# Write updated DataFrame to CSV
updated_data.to_csv('CustomData.csv', index=False)


In [7]:
df=pd.read_csv('CustomData.csv')
tag_to_group = defaultdict(list)
for idx, group in enumerate(assigned_groups):
    for tag in group:
        tag_to_group[tag].append(idx)  # Use index of the group instead of the group itself

# Initialize a dictionary to store the groups each user belongs to
user_to_groups = defaultdict(list)

# Iterate through each user and assign them to groups based on their ProductTag
for index, row in df.iterrows():
    user_id = row['UserID']
    product_tags = row['ProductTag'].split(',')  # Split tags if they are comma-separated
    for tag in product_tags:
        if tag in tag_to_group:
            user_to_groups[user_id].extend(tag_to_group[tag])

# Organize users into groups
groups_users = defaultdict(set)  # Changed to set to ensure distinct values
for user, groups in user_to_groups.items():
    for group in groups:
        groups_users[group].add(user)  # Use add instead of append for sets

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {list(users)}")  # Convert set to list for printing


Group 0: ['sdk', 'shd', 'pqr', 'tyu', 'szz', 'xps', 'ahj', 'sgh', 'sed', 'ukl', 'ixu', 'abc', 'hmu']
Group 1: ['hqd', 'ihk', 'bde', 'sdj', 'gbf', 'inn', 'ikl', 'bhu', 'tny', 'qts']
Group 2: ['uuh', 'ihk', 'bde', 'sdj', 'ikl', 'ahj', 'bhu', 'abc']
Group 3: ['hqd', 'shd', 'bde', 'sdj', 'wjq', 'inn', 'ldh', 'olk', 'sed', 'mno', 'sgg', 'res', 'rty', 'tny', 'pmj']
Group 4: ['okj', 'ihk', 'sdj', 'ikl', 'ahj', 'bhu', 'sgg', 'res', 'abc', 'pmj', 'pgk']


In [8]:
import pandas as pd
from collections import defaultdict

# Load the data
df = pd.read_csv('CustomData.csv')

# Define the new user's region
new_user_region = 'Kolkata'

# Find the group with the maximum users
group_with_max_users = max(groups_users, key=lambda x: len(groups_users[x]))

# Find users from the same region as the new user
users_from_same_region = [user for user, region in zip(df['UserID'], df['Region']) if region == new_user_region]
if len(users_from_same_region)>0:
    print('User(s) from same region: ',users_from_same_region)
# Add the new user 'xyz' to the group with the maximum users
group_with_max_users_users = set(groups_users[group_with_max_users])
group_with_max_users_users.add('xyz')
groups_users[group_with_max_users] = group_with_max_users_users

# If users from the same region exist, add 'xyz' to the corresponding group
if users_from_same_region:
    user_from_same_region = users_from_same_region[0]  # Assuming only one user from the same region
    group_with_same_region = next((group for group, users in groups_users.items() if user_from_same_region in users), None)
    if group_with_same_region is not None:
        group_with_same_region_users = set(groups_users[group_with_same_region])
        group_with_same_region_users.add('xyz')
        groups_users[group_with_same_region] = group_with_same_region_users

# Sort the groups by their names
sorted_groups_users = sorted(groups_users.items(), key=lambda x: x[0])

# Print the users assigned to each group
for group, users in sorted_groups_users:
    print(f"Group {group}: {users}")


User(s) from same region:  ['okj']
Group 0: {'sdk', 'shd', 'pqr', 'tyu', 'szz', 'xps', 'ahj', 'sgh', 'sed', 'ukl', 'ixu', 'abc', 'hmu'}
Group 1: {'hqd', 'ihk', 'bde', 'sdj', 'gbf', 'inn', 'ikl', 'bhu', 'tny', 'qts'}
Group 2: {'uuh', 'ihk', 'bde', 'sdj', 'ikl', 'ahj', 'bhu', 'abc'}
Group 3: {'hqd', 'shd', 'xyz', 'bde', 'sdj', 'wjq', 'inn', 'ldh', 'olk', 'sed', 'mno', 'sgg', 'res', 'rty', 'tny', 'pmj'}
Group 4: {'okj', 'ihk', 'xyz', 'sdj', 'ikl', 'ahj', 'bhu', 'sgg', 'res', 'abc', 'pmj', 'pgk'}


In [52]:
df_groups_users= pd.DataFrame(sorted_groups_users, columns=['Group', 'Users'])
df_groups_users= df_groups_users.drop(columns=['Group'])
df_groups_users['Assigned_Group'] = [assigned_groups[group] for group, _ in sorted_groups_users]
df_groups_users.to_csv('Output.csv', index=False)
df_groups_users.head()

Unnamed: 0,Users,Assigned_Group
0,"{sdk, shd, pqr, tyu, szz, xps, ahj, sgh, sed, ...","[tech, toys, beauty, diy, food, watches]"
1,"{hqd, ihk, bde, sdj, gbf, inn, ikl, bhu, tny, ...","[fashion, fitness, gardening, photography, boo..."
2,"{uuh, ihk, bde, sdj, ikl, ahj, bhu, abc}","[skincare, camping, pets, books, makeup, photo..."
3,"{hqd, shd, xyz, bde, sdj, wjq, inn, ldh, olk, ...","[electronics, baby, craft, music, books, jewelry]"
4,"{okj, ihk, xyz, sdj, ikl, ahj, bhu, sgg, res, ...","[makeup, photography, watches, kitchenware, sk..."


In [53]:
import pandas as pd
from Crypto.Cipher import DES
from Crypto.Random import get_random_bytes
import base64

# Padding for the input data
def pad(data):
    length = 8 - (len(data) % 8)
    return data + bytes([length]) * length

# Unpad the data
def unpad(data):
    return data[:-data[-1]]

# Encrypt data using DES
def encrypt_data(key, data):
    cipher = DES.new(key, DES.MODE_ECB)
    padded_data = pad(data)
    encrypted_data = cipher.encrypt(padded_data)
    return base64.b64encode(encrypted_data)

# Decrypt data using DES
def decrypt_data(key, data):
    cipher = DES.new(key, DES.MODE_ECB)
    decrypted_data = cipher.decrypt(base64.b64decode(data))
    return unpad(decrypted_data)

# Generate a random 8-byte key for DES
def generate_des_key():
    return get_random_bytes(8)

# Generate DES key
des_key = generate_des_key()

# Create a new DataFrame to store encrypted data
df_encrypted = pd.DataFrame()

# Encrypt each cell in the DataFrame and store in df_encrypted
for column in df_groups_users.columns:
    df_encrypted[column] = df_groups_users[column].apply(lambda value: encrypt_data(des_key, str(value).encode()).decode())

# Save the encrypted DataFrame to an Excel file
df_encrypted.to_excel('Encrypted_GroupedUsers.xlsx', index=False)

print("Data encrypted and saved to Encrypted_GroupedUsers.xlsx")

# Read the encrypted DataFrame from Excel
df_encrypted_read = pd.read_excel('Encrypted_GroupedUsers.xlsx')

# Decrypt the data
for column in df_encrypted_read.columns:
    df_encrypted_read[column] = df_encrypted_read[column].apply(lambda value: decrypt_data(des_key, value).decode())

print("Data decrypted:")
print(df_encrypted_read)


Data encrypted and saved to Encrypted_GroupedUsers.xlsx
Data decrypted:
                                               Users  \
0  {'sdk', 'shd', 'pqr', 'tyu', 'szz', 'xps', 'ah...   
1  {'hqd', 'ihk', 'bde', 'sdj', 'gbf', 'inn', 'ik...   
2  {'uuh', 'ihk', 'bde', 'sdj', 'ikl', 'ahj', 'bh...   
3  {'hqd', 'shd', 'xyz', 'bde', 'sdj', 'wjq', 'in...   
4  {'okj', 'ihk', 'xyz', 'sdj', 'ikl', 'ahj', 'bh...   

                                      Assigned_Group  
0  ['tech', 'toys', 'beauty', 'diy', 'food', 'wat...  
1  ['fashion', 'fitness', 'gardening', 'photograp...  
2  ['skincare', 'camping', 'pets', 'books', 'make...  
3  ['electronics', 'baby', 'craft', 'music', 'boo...  
4  ['makeup', 'photography', 'watches', 'kitchenw...  


In [54]:
df_groups_users.head()

Unnamed: 0,Users,Assigned_Group
0,"{sdk, shd, pqr, tyu, szz, xps, ahj, sgh, sed, ...","[tech, toys, beauty, diy, food, watches]"
1,"{hqd, ihk, bde, sdj, gbf, inn, ikl, bhu, tny, ...","[fashion, fitness, gardening, photography, boo..."
2,"{uuh, ihk, bde, sdj, ikl, ahj, bhu, abc}","[skincare, camping, pets, books, makeup, photo..."
3,"{hqd, shd, xyz, bde, sdj, wjq, inn, ldh, olk, ...","[electronics, baby, craft, music, books, jewelry]"
4,"{okj, ihk, xyz, sdj, ikl, ahj, bhu, sgg, res, ...","[makeup, photography, watches, kitchenware, sk..."


In [60]:
import pandas as pd
from Crypto.Cipher import AES
from Crypto.Random import get_random_bytes
import base64

# Padding for the input data
def pad(data):
    length = AES.block_size - (len(data) % AES.block_size)
    return data + bytes([length]) * length

# Unpad the data
def unpad(data):
    return data[:-data[-1]]

# Encrypt data using AES
def encrypt_data(key, data):
    cipher = AES.new(key, AES.MODE_ECB)
    padded_data = pad(data.encode())
    encrypted_data = cipher.encrypt(padded_data)
    return base64.b64encode(encrypted_data).decode()

# Decrypt data using AES
def decrypt_data(key, data):
    cipher = AES.new(key, AES.MODE_ECB)
    decrypted_data = cipher.decrypt(base64.b64decode(data.encode()))
    return unpad(decrypted_data).decode()

# Generate a random 16-byte key for AES (128 bits)
def generate_aes_key():
    return get_random_bytes(16)

# Generate AES key
aes_key = generate_aes_key()

# Create a new DataFrame to store encrypted data
df_encrypted = pd.DataFrame()

# Encrypt each cell in the DataFrame and store in df_encrypted
for column in df_groups_users.columns:
    df_encrypted[column] = df_groups_users[column].apply(lambda value: encrypt_data(aes_key, str(value)))

# Save the encrypted DataFrame to an Excel file
df_encrypted.to_excel('Encrypted_GroupedUsers.xlsx', index=False)

print("Data encrypted and saved to Encrypted_GroupedUsers.xlsx")

# Read the encrypted DataFrame from Excel
df_encrypted_read = pd.read_excel('Encrypted_GroupedUsers.xlsx')

# Decrypt the data
for column in df_encrypted_read.columns:
    df_encrypted_read[column] = df_encrypted_read[column].apply(lambda value: decrypt_data(aes_key, str(value)))

print("Data decrypted:")
print(df_encrypted_read)


Data encrypted and saved to Encrypted_GroupedUsers.xlsx
Data decrypted:
                                               Users  \
0  {'sdk', 'shd', 'pqr', 'tyu', 'szz', 'xps', 'ah...   
1  {'hqd', 'ihk', 'bde', 'sdj', 'gbf', 'inn', 'ik...   
2  {'uuh', 'ihk', 'bde', 'sdj', 'ikl', 'ahj', 'bh...   
3  {'hqd', 'shd', 'xyz', 'bde', 'sdj', 'wjq', 'in...   
4  {'okj', 'ihk', 'xyz', 'sdj', 'ikl', 'ahj', 'bh...   

                                      Assigned_Group  
0  ['tech', 'toys', 'beauty', 'diy', 'food', 'wat...  
1  ['fashion', 'fitness', 'gardening', 'photograp...  
2  ['skincare', 'camping', 'pets', 'books', 'make...  
3  ['electronics', 'baby', 'craft', 'music', 'boo...  
4  ['makeup', 'photography', 'watches', 'kitchenw...  


In [64]:
import pandas as pd
from Crypto.Cipher import AES
from Crypto.PublicKey import ECC
from Crypto.Random import get_random_bytes
import base64

# Padding for the input data
def pad(data):
    length = AES.block_size - (len(data) % AES.block_size)
    return data + bytes([length]) * length

# Unpad the data
def unpad(data):
    return data[:-data[-1]]

# Encrypt data using AES
def encrypt_data_aes(key, data):
    cipher = AES.new(key, AES.MODE_ECB)
    padded_data = pad(data.encode())
    encrypted_data = cipher.encrypt(padded_data)
    return base64.b64encode(encrypted_data).decode()

# Decrypt data using AES
def decrypt_data_aes(key, data):
    cipher = AES.new(key, AES.MODE_ECB)
    decrypted_data = cipher.decrypt(base64.b64decode(data.encode()))
    return unpad(decrypted_data).decode()

# Generate a random 16-byte key for AES (128 bits)
def generate_aes_key():
    return get_random_bytes(16)

# Generate ECC key pair
def generate_ecc_key_pair():
    key = ECC.generate(curve='P-256')
    private_key = key.export_key(format='PEM')
    public_key = key.public_key().export_key(format='PEM')
    return private_key, public_key

# Create a new DataFrame to store encrypted data
df_encrypted = pd.DataFrame()

# Generate AES key
aes_key = generate_aes_key()

# Generate ECC key pair
private_key, public_key = generate_ecc_key_pair()

# Encrypt each cell in the DataFrame using AES and store in df_encrypted
for column in df_groups_users.columns:
    df_encrypted[column] = df_groups_users[column].apply(lambda value: encrypt_data_aes(aes_key, str(value)))

# Save the encrypted DataFrame to an Excel file
df_encrypted.to_excel('Encrypted_GroupedUsers.xlsx', index=False)

print("Data encrypted with AES and ECC and saved to Encrypted_GroupedUsers.xlsx")

# Read the encrypted DataFrame from Excel
df_encrypted_read = pd.read_excel('Encrypted_GroupedUsers.xlsx')

# Decrypt the data
for column in df_encrypted_read.columns:
    df_encrypted_read[column] = df_encrypted_read[column].apply(lambda value: decrypt_data_aes(aes_key, value))

print("Data decrypted:")
print(df_encrypted_read)


Data encrypted with AES and ECC and saved to Encrypted_GroupedUsers.xlsx
Data decrypted:
                                               Users  \
0  {'sdk', 'shd', 'pqr', 'tyu', 'szz', 'xps', 'ah...   
1  {'hqd', 'ihk', 'bde', 'sdj', 'gbf', 'inn', 'ik...   
2  {'uuh', 'ihk', 'bde', 'sdj', 'ikl', 'ahj', 'bh...   
3  {'hqd', 'shd', 'xyz', 'bde', 'sdj', 'wjq', 'in...   
4  {'okj', 'ihk', 'xyz', 'sdj', 'ikl', 'ahj', 'bh...   

                                      Assigned_Group  
0  ['tech', 'toys', 'beauty', 'diy', 'food', 'wat...  
1  ['fashion', 'fitness', 'gardening', 'photograp...  
2  ['skincare', 'camping', 'pets', 'books', 'make...  
3  ['electronics', 'baby', 'craft', 'music', 'boo...  
4  ['makeup', 'photography', 'watches', 'kitchenw...  
