In [12]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
encoded_main_category = pd.read_csv('../encoded_main_category.csv')
ratings = pd.read_csv('../ratings_test.csv')
user_data=pd.read_csv('../user_data.csv')
products=pd.read_csv('../test_data.csv')

In [13]:
def calculate_similarity(user1, user2):
    gender_sim = 1 if user1['gender'] == user2['gender'] else 0
    age_sim = 1 - abs(user1['age'] - user2['age']) / 100
    if user1['city'] == user2['city']:
        location_sim = 1  # Same city
    elif user1['country'] == user2['country']:
        location_sim = 0.5  # Same country, different city
    else:
        location_sim = 0  # Different country
    total_similarity = 0.2 * gender_sim + 0.6 * age_sim + 0.2 * location_sim
    return total_similarity
def find_top5_similar_users(user_info, user_data):
    similarities = []
    for i, other_user in user_data.iterrows():
        similarity = calculate_similarity(user_info, other_user)
        similarities.append((other_user['userID'], similarity))
    similarities.sort(key=lambda x: x[1], reverse=True)
    top5_similar_users = [user[0] for user in similarities[:5]]
    return top5_similar_users
user_info = {
    'age': 22,
    'gender': 'female',
    'city': 'thomasville',
    'country': 'us',
    'interest': '18,19'
}
def collaborative_filtering_top5_recommendations(user_info, user_data, ratings_data, top_n=10):
    # Find the top 5 similar users
    top5_similar_users = find_top5_similar_users(user_info, user_data)
    filtered_ratings_data = ratings_data[ratings_data['userid'].isin(top5_similar_users)]
    user_item_matrix = filtered_ratings_data.pivot_table(
        index='userid',
        columns='productid',
        values='rating',
        aggfunc='mean'
    ).fillna(0)
    user_similarity = cosine_similarity(user_item_matrix)

    recommended_items = set()
    for target_user_id in top5_similar_users:
        try:
            target_user_index = user_item_matrix.index.get_loc(target_user_id)
        except KeyError:
            continue  # Skip users not present in the matrix
        user_similarities = user_similarity[target_user_index]
        similar_users_indices = user_similarities.argsort()[::-1][1:]

        for user_index in similar_users_indices:
            rated_by_similar_user = user_item_matrix.iloc[user_index]
            not_rated_by_target_user = (rated_by_similar_user > 0) & (user_item_matrix.iloc[target_user_index] == 0)
            recommended_items.update(user_item_matrix.columns[not_rated_by_target_user][:top_n])
            if len(recommended_items) >= top_n:
                break
    recommended_items_details = ratings_data[
        ratings_data['productid'].isin(recommended_items)
    ][['productid', 'rating']].drop_duplicates()
    return recommended_items_details.sort_values(by='rating', ascending=False).head(top_n)['productid'].tolist()
collaborative_filtering_top5_rec=collaborative_filtering_top5_recommendations(user_info,user_data,ratings,top_n=20)
collaborative_filtering_top5_rec

['app250928',
 'acs106891',
 'acs245060',
 'ggf196312',
 'app253530',
 'acs157229',
 'acs010207',
 'hkt067539',
 'hkt204108',
 'acs185415',
 'acs226990',
 'ind205074',
 'ggf153700',
 'acs003283',
 'ind120012',
 'acs172405',
 'cmb091734',
 'acs068708',
 'acs227005',
 'acs050091']

In [15]:
def highest_rated_products_by_interest(user_info, product_data, top_n=20):
    # Extract user interest as a list of main_category_encoded values
    user_interest = list(map(int, user_info['interest'].split(',')))

    # Filter the product dataset for items matching the user's interests
    filtered_products = product_data[product_data['main_category_encoded'].isin(user_interest)]

    # Sort the filtered products by ratings and number of ratings in descending order
    top_products = filtered_products.sort_values(
        by=['ratings', 'no_of_ratings'], ascending=False
    ).head(top_n)

    # Return the top product names and IDs as a list
    return top_products[['id', 'name', 'ratings', 'no_of_ratings']].values.tolist()
highest_rated_products_by_interest_rec=highest_rated_products_by_interest(user_info,products,top_n=20)
highest_rated_products_by_interest_rec

[['wcl147640',
  "Rushwak Women's Soft Silicone Bra Strap Cushions Holder Bra Set Strap Holder Light Weight Non-Slip Safe Shoulder Pads Free...",
  5.0,
  40.0],
 ['wcl040115',
  "Rushwak Women's Soft Silicone Bra Strap Cushions Holder Bra Set Strap Holder Light Weight Non-Slip Safe Shoulder Pads Free...",
  5.0,
  40.0],
 ['wcl006268',
  'SATPURUSH Lycra Saree Shapewear Petticoat for Women, Cotton Blended,Petticoat,Skirts for Women,Shape Wear Dress for Saree',
  5.0,
  36.0],
 ['wcl177074', "Yashika women's Art Silk Saree With Blouse Piece", 5.0, 32.0],
 ['wcl121491', "Yashika women's Art Silk Saree With Blouse Piece", 5.0, 32.0],
 ['wcl234739',
  'KTRIKSHA Crepe Printed A-Line Kurti for Women - Stylish and Comfortable Indian Ethnic Wear',
  5.0,
  28.0],
 ['wcl056855',
  'ORZIX Fashion Dressing Tape/Invisible Double-Sided Body Tape (Fabric and Skin Friendly) Double Sided Tape for Clothes Dres...',
  5.0,
  23.0],
 ['wcl023357',
  "Amayra Women's Rayon Embroidery Kurti with Palazzos a