In [37]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [50]:
# Load the data
users = pd.read_csv('data/user-old.csv')
contents = pd.read_csv('data/contents.csv')
views = pd.read_csv('data/content_views.csv')

In [51]:
# Load the data
users.head()

Unnamed: 0,portal_user_id
0,aaa1b_old-199
1,aaa1b-1
2,aaa1b-199
3,aaa1b-201
4,aaa1b-202


In [87]:
contents=contents.drop_duplicates(subset=['global_id', 'entity_id', 'portal_name'])

In [88]:
# Load the data
contents.head()

Unnamed: 0,global_id,entity_id,content_type,content_title,content_topic,portal_name
0,214,214,article,Introduction to Aging and Dementia,Brain Health,caac
1,243,243,article,"Delirium, Depression, and Apathy",Brain Health,caac
2,284,284,article,Communication and Dementia,Brain Health,caac
3,285,285,article,Sexuality and Dementia,Brain Health,caac
4,332,332,article,Latest Research on Exercise and Brain Health,,caac


In [34]:
# Load the data
views.head()

Unnamed: 0,portal_user_id,global_id,entity_id,event_type,num_of_views
0,aaa1b-202,138.0,6,content_view,2
1,aaa1b-202,1161.0,555,content_view,1
2,aaa1b-202,1919.0,914,content_view,2
3,aaa1b-205,138.0,6,content_view,9
4,aaa1b-205,1919.0,914,content_view,4


In [31]:
# Function to extract portal name from portal_user_id
def get_portal_name(user_id):
    return user_id.split('-')[0]

In [16]:
# Function to get recommendations
def get_recommendations(user_id, num_recommendations=5):
    portal_name = get_portal_name(user_id)
    if not portal_name:
        print(f"No views found for user {user_id}. Cannot determine portal_name.")
        return None

    # Filter contents and views by the identified portal_name
    contents_filtered = contents[contents['portal_name'] == portal_name]
    views_filtered = views[views['global_id'].isin(contents_filtered['global_id'])]

    # Merge views with contents to get the necessary content details
    views_filtered = views_filtered.merge(contents_filtered[['global_id', 'content_title']], on='global_id')

    # Create user-content interaction matrix
    user_content_matrix = views_filtered.pivot_table(index='portal_user_id', columns='content_title', values='num_of_views', fill_value=0)

    # Convert the interaction matrix to a sparse matrix
    sparse_user_content_matrix = csr_matrix(user_content_matrix.values)

    # Compute cosine similarity between users
    user_similarity = cosine_similarity(sparse_user_content_matrix)

    # Convert similarity matrix to DataFrame for easier manipulation
    user_similarity_df = pd.DataFrame(user_similarity, index=user_content_matrix.index, columns=user_content_matrix.index)

    # Find similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]  # Exclude the user itself

    # Get the content viewed by similar users
    similar_users_views = user_content_matrix.loc[similar_users]

    # Sum the views across similar users
    similar_users_views_sum = similar_users_views.sum(axis=0)

    # Remove contents the target user has already viewed
    user_views = user_content_matrix.loc[user_id]
    similar_users_views_sum = similar_users_views_sum[user_views == 0]

    # Recommend the top N contents
    recommendations = similar_users_views_sum.sort_values(ascending=False).head(num_recommendations)

    # Merge with the contents dataframe to get additional details
    recommended_contents = contents_filtered[contents_filtered['content_title'].isin(recommendations.index)]

    # Return the recommendations with additional details
    return recommended_contents[['global_id', 'entity_id', 'content_type', 'content_title', 'content_topic', 'portal_name']]

In [42]:
# Example: Get recommendations for a specific user
get_recommendations('kansas-283')

Unnamed: 0,global_id,entity_id,content_type,content_title,content_topic,portal_name
1796,349,349,article,"Teepa Snow: Normal, Not Normal",Featured Content,kansas
2833,375,369,elearn,Emergency Planning Workbook,,kansas
3626,375,369,elearn,Emergency Planning Workbook,Featured Content,kansas
8005,1186,528,article,5 Skills For Mobility & Fall Prevention,,kansas
8222,1182,524,article,Our #1 Tip For Preventing Burnout,,kansas
11422,988,403,video,Communication Q&A with Teepa Snow,,kansas
19673,1186,528,article,5 Skills For Mobility & Fall Prevention,Mobility & Fall Prevention,kansas
22553,1182,524,article,Our #1 Tip For Preventing Burnout,Caregiver Wellness,kansas
25140,988,403,video,Communication Q&A with Teepa Snow,Communication Changes,kansas


# Added age_range, gender, tags in the user data

In [57]:
import json

In [52]:
users = pd.read_csv('data/user.csv')

In [54]:
users.head()

Unnamed: 0,portal_user_id,age_range,gender,tags
0,coaaa-190,60+,female,"[""Activities"", ""Aggression"", ""Agitation"", ""Anx..."
1,wisconsin-1266,46-60,female,"[""Bathing"", ""Fraud & Scams"", ""Self-Care""]"
2,sarpc-184,60+,female,"[""Self-Care""]"
3,wisconsin-50,18-25,male,"[""Abuse"", ""Aging"", ""Dementia & Alzheimer's"", ""..."
4,wisconsin-93,36-45,female,"[""Activities"", ""Future Planning"", ""Moving & Tr..."


In [55]:
# Function to preprocess tags
def preprocess_tags(tag_string):
    return json.loads(tag_string)

In [58]:
# Preprocess user tags
users['tags'] = users['tags'].apply(preprocess_tags)

In [59]:
# Create a user attributes DataFrame
user_attributes = users.set_index('portal_user_id')

In [61]:
user_attributes.head()

Unnamed: 0_level_0,age_range,gender,tags
portal_user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
coaaa-190,60+,female,"[Activities, Aggression, Agitation, Anxiety, B..."
wisconsin-1266,46-60,female,"[Bathing, Fraud & Scams, Self-Care]"
sarpc-184,60+,female,[Self-Care]
wisconsin-50,18-25,male,"[Abuse, Aging, Dementia & Alzheimer's, Diabete..."
wisconsin-93,36-45,female,"[Activities, Future Planning, Moving & Transfe..."


In [62]:
# Function to compute tag similarity
def compute_tag_similarity(user_tags, other_tags):
    return len(set(user_tags).intersection(set(other_tags))) / len(set(user_tags).union(set(other_tags)))

In [63]:
# Function to compute user similarity based on attributes
def compute_user_similarity(target_user_id, user_attributes):
    target_user = user_attributes.loc[target_user_id]
    similarities = {}

    for user_id, attributes in user_attributes.iterrows():
        if user_id == target_user_id:
            continue

        age_similarity = 1 if target_user['age_range'] == attributes['age_range'] else 0
        gender_similarity = 1 if target_user['gender'] == attributes['gender'] else 0
        tag_similarity = compute_tag_similarity(target_user['tags'], attributes['tags'])

        # Weighted sum of similarities (adjust weights as necessary)
        overall_similarity = 0.3 * age_similarity + 0.3 * gender_similarity + 0.4 * tag_similarity
        similarities[user_id] = overall_similarity

    return pd.Series(similarities)

In [85]:
# Function to get recommendations
def get_recommendations(user_id, num_recommendations=5):
    portal_name = get_portal_name(user_id)

    if not portal_name:
        print(f"No views found for user {user_id}. Cannot determine portal_name.")
        return None
    
    # Filter contents and views by the identified portal_name
    contents_filtered = contents[contents['portal_name'] == portal_name]
    views_filtered = views[views['global_id'].isin(contents_filtered['global_id'])]

    # Merge views with contents to get the necessary content details
    views_filtered = views_filtered.merge(contents_filtered[['global_id', 'content_title']], on='global_id')

    # Create user-content interaction matrix
    user_content_matrix = views_filtered.pivot_table(index='portal_user_id', columns='content_title', values='num_of_views', fill_value=0)

    # Convert the interaction matrix to a sparse matrix
    sparse_user_content_matrix = csr_matrix(user_content_matrix.values)

    # Compute cosine similarity between users based on content views
    user_similarity = cosine_similarity(sparse_user_content_matrix)

    # Convert similarity matrix to DataFrame for easier manipulation
    user_similarity_df = pd.DataFrame(user_similarity, index=user_content_matrix.index, columns=user_content_matrix.index)

     # Check if the user exists in the similarity matrix
    if user_id not in user_similarity_df.index:
        print(f"User {user_id} not found in similarity matrix.")
        return None

    # Compute user similarity based on attributes
    user_attr_similarity = compute_user_similarity(user_id, user_attributes)

    # Combine similarities
    combined_similarity = 0.5 * user_similarity_df.loc[user_id] + 0.5 * user_attr_similarity
    combined_similarity = combined_similarity.dropna().sort_values(ascending=False)

    # Find similar users
    similar_users = combined_similarity.index[:10]  # Take top 10 similar users for example

    # Get the content viewed by similar users
    similar_users_views = user_content_matrix.loc[similar_users]

    # Sum the views across similar users
    similar_users_views_sum = similar_users_views.sum(axis=0)

    # Remove contents the target user has already viewed
    if user_id in user_content_matrix.index:
        user_views = user_content_matrix.loc[user_id]
        similar_users_views_sum = similar_users_views_sum[user_views == 0]

    # Recommend the top N contents
    recommendations = similar_users_views_sum.sort_values(ascending=False).head(num_recommendations)

    # Merge with the contents dataframe to get additional details
    recommended_contents = contents_filtered[contents_filtered['content_title'].isin(recommendations.index)]

    # Return the recommendations with additional details
    return recommended_contents[['global_id', 'entity_id', 'content_type', 'content_title', 'content_topic', 'portal_name']]

In [89]:
# Example: Get recommendations for a specific user
user_id = 'aaa1b-207'
get_recommendations(user_id)

Unnamed: 0,global_id,entity_id,content_type,content_title,content_topic,portal_name
24327,375,369,elearn,Emergency Planning Workbook,,aaa1b
25064,1534,773,article,How To Recover From Burnout: A Guide For Careg...,,aaa1b
26403,322,322,article,Reframing Negative Thoughts,Caregiver Wellness,aaa1b
26947,986,423,elearn,Trualta Companion Cards,,aaa1b
32097,1214,564,article,Missing Doses & Medication Mistakes,Medications,aaa1b
