In [3]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
     ---------------------------------------- 0.0/154.4 kB ? eta -:--:--
     ------- ------------------------------- 30.7/154.4 kB 1.3 MB/s eta 0:00:01
     --------- --------------------------- 41.0/154.4 kB 279.3 kB/s eta 0:00:01
     -------------- ---------------------- 61.4/154.4 kB 363.1 kB/s eta 0:00:01
     ---------------------- -------------- 92.2/154.4 kB 403.5 kB/s eta 0:00:01
     ---------------------- -------------- 92.2/154.4 kB 403.5 kB/s eta 0:00:01
     ---------------------------- ------- 122.9/154.4 kB 379.3 kB/s eta 0:00:01
     -----------------------------------  153.6/154.4 kB 416.7 kB/s eta 0:00:01
     ------------------------------------ 154.4/154.4 kB 400.5 kB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: still running...
  Installing

In [4]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, GridSearchCV
from surprise.accuracy import rmse, mae
from sklearn.metrics import ndcg_score, precision_score, recall_score

# Generate synthetic users
def generate_users(n=1000):
    return pd.DataFrame({
        'user_id': range(1, n+1),
        'age': np.random.randint(18, 60, n),
        'location': np.random.choice(['NY', 'LA', 'SF', 'TX', 'CHI'], n),
    })

# Generate synthetic products
def generate_products(n=500):
    categories = ['Electronics', 'Fashion', 'Home', 'Beauty', 'Sports']
    return pd.DataFrame({
        'product_id': range(1, n+1),
        'name': [f'Product {i}' for i in range(1, n+1)],
        'category': np.random.choice(categories, n),
        'description': [f'Description of product {i}' for i in range(1, n+1)],
        'premium': np.random.choice([0, 1], n, p=[0.9, 0.1])  # 10% premium products
    })

# Generate synthetic interactions with social engagement
def generate_interactions(users, products, n=5000):
    interactions = []
    for _ in range(n):
        interactions.append({
            'user_id': random.choice(users['user_id'].values),
            'product_id': random.choice(products['product_id'].values),
            'rating': random.randint(1, 5),
            'liked': random.choice([0, 1]),
            'shared': random.choice([0, 1]),
        })
    return pd.DataFrame(interactions)

# Create datasets
users_df = generate_users()
products_df = generate_products()
interactions_df = generate_interactions(users_df, products_df)

# Implement Content-Based Filtering
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(products_df['description'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def recommend_products_content_based(product_id, top_n=5):
    idx = products_df.index[products_df['product_id'] == product_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    product_indices = [i[0] for i in sim_scores]
    return products_df.iloc[product_indices]

# Implement Collaborative Filtering (Hyperparameter Tuning)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(interactions_df[['user_id', 'product_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

param_grid = {'n_factors': [50, 100], 'n_epochs': [10, 20], 'lr_all': [0.002, 0.005], 'reg_all': [0.02, 0.1]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

best_params = gs.best_params['rmse']
print("Best Parameters:", best_params)

model = SVD(**best_params)
model.fit(trainset)
predictions = model.test(testset)

print("RMSE:", rmse(predictions))
print("MAE:", mae(predictions))


def recommend_products_collaborative(user_id, top_n=5):
    product_ids = products_df['product_id'].unique()
    predictions = [model.predict(user_id, pid) for pid in product_ids]
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommended_ids = [pred.iid for pred in predictions[:top_n]]
    return products_df[products_df['product_id'].isin(recommended_ids)]

# Implement Bias Mitigation in Ranking
def adjust_ranking_bias(recommendations, category_boost={'Fashion': 1.2, 'Electronics': 1.1}):
    recommendations['bias_adjustment'] = recommendations['category'].map(category_boost).fillna(1)
    recommendations['final_score'] = recommendations['bias_adjustment'] + 1
    recommendations = recommendations.sort_values(by='final_score', ascending=False)
    return recommendations.drop(columns=['bias_adjustment', 'final_score'])

# Cold Start Handling for New Users
def recommend_for_new_users(top_n=5):
    popular_products = interactions_df.groupby('product_id').size().reset_index(name='count')
    popular_products = popular_products.sort_values(by='count', ascending=False).head(top_n)
    return products_df[products_df['product_id'].isin(popular_products['product_id'])]

# Cold Start Handling for New Products
def recommend_new_products(top_n=5):
    latest_products = products_df.sort_values(by='product_id', ascending=False).head(top_n)
    return latest_products

# Integrate Social Media Engagement
def recommend_with_social_boost(user_id, top_n=5, like_weight=0.2, share_weight=0.3):
    recommendations = recommend_products_collaborative(user_id, top_n * 2)
    recommendations = adjust_ranking_bias(recommendations)
    interactions = interactions_df[interactions_df['user_id'] == user_id]
    engagement_scores = interactions.groupby('product_id').agg({'liked': 'sum', 'shared': 'sum'}).reset_index()
    engagement_scores['score'] = (engagement_scores['liked'] * like_weight) + (engagement_scores['shared'] * share_weight)
    recommendations = recommendations.merge(engagement_scores, on='product_id', how='left').fillna(0)
    recommendations['final_score'] = recommendations['score'] + 1
    recommendations = recommendations.sort_values(by='final_score', ascending=False).head(top_n)
    return recommendations.drop(columns=['score', 'final_score'])

# Example recommendations
sample_product_id = 10
sample_user_id = 5
print(f"Content-Based Recommendations for Product {sample_product_id}:")
print(recommend_products_content_based(sample_product_id))
print(f"Collaborative Recommendations for User {sample_user_id}:")
print(recommend_products_collaborative(sample_user_id))
print(f"Social-Boosted Recommendations with Bias Mitigation for User {sample_user_id}:")
print(recommend_with_social_boost(sample_user_id))
print("Recommendations for New Users:")
print(recommend_for_new_users())
print("Recommendations for New Products:")
print(recommend_new_products())


Best Parameters: {'n_factors': 50, 'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.1}
RMSE: 1.4260
RMSE: 1.4260140070222234
MAE:  1.2246
MAE: 1.22463481441963
Content-Based Recommendations for Product 10:
   product_id       name category               description  premium
0           1  Product 1  Fashion  Description of product 1        0
1           2  Product 2  Fashion  Description of product 2        0
2           3  Product 3     Home  Description of product 3        0
3           4  Product 4     Home  Description of product 4        1
4           5  Product 5   Beauty  Description of product 5        0
Collaborative Recommendations for User 5:
     product_id         name     category                 description  premium
185         186  Product 186         Home  Description of product 186        0
272         273  Product 273       Beauty  Description of product 273        0
339         340  Product 340  Electronics  Description of product 340        0
369         370  Product 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommendations['bias_adjustment'] = recommendations['category'].map(category_boost).fillna(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommendations['final_score'] = recommendations['bias_adjustment'] + 1
