In [1]:
!pip install tensorflow
import tensorflow as tf
import tensorflow_hub as hub
embed = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", trainable=False)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd
import numpy as np

In [3]:
# Connect to firebase
cred = credentials.Certificate("/content/serviceAccountKey.json")
firebase_admin.initialize_app(cred)
db = firestore.client()

In [4]:
# Load data
def load_data():
    users = db.collection('users').get()

    user_data = []
    for doc in users:
        data = doc.to_dict()
        user_data.append(data)

    profile_data = []
    for doc in users:
        data = doc.to_dict()
        profile_data.append(data['profile'])

    return user_data, profile_data

In [5]:
# Process data so it can be use
def process_data(user_data, profile_data):
    user_data = pd.DataFrame(user_data, columns=['uid'])
    profile_data = pd.DataFrame(profile_data, columns=['displayName','skills', 'interests'])
    merge_data = pd.merge(user_data, profile_data, left_index=True, right_index=True)

    result_data = merge_data[['uid', 'displayName', 'skills', 'interests']]
    result_data['skills'] = result_data['skills'].apply(lambda skill_list: ', '.join([skill_dict['name'] for skill_dict in skill_list if skill_dict and 'uid' in skill_dict]) if isinstance(skill_list, list) else 'No Skill')
    result_data['interests'] = result_data['interests'].apply(lambda interest_list: ', '.join([interest_dict['name'] for interest_dict in interest_list if interest_dict and 'uid' in interest_dict]) if isinstance(interest_list, list) else 'No Interest')

    user_data = pd.DataFrame(result_data)

    return user_data

In [6]:
# Define a function to generate user stories
def generate_user_stories(user_data):
    user_story = []
    for index, row in user_data.iterrows():
        user_story.append({
            "uid": row['uid'],
            "story": f"I have Skill {row['skills']}, and I'm Interested in {row['interests']}"
        })
    return user_story

In [16]:
# Define a function to find the top N most similar users to a given user
def find_top_similar_users(current_user_uid, user_data, user_story, embed, n):
    # Check if current user not found
    if user_data.loc[user_data['uid'] == current_user_uid].empty:
        return "Current user not found!"

    # Get the current user's data and story
    current_user = user_data.loc[user_data['uid'] == current_user_uid]
    current_user_story = f"I have Skill {current_user['skills'].values.item()} , and I'm Interested in {current_user['interests'].values.item()}"

    # Encode the current user story into a vector
    current_user_vector = embed([current_user_story])

    # Encode all other user stories into vectors and store them in a matrix along with the user uid
    other_user_vectors = []
    other_user_uid = []
    for user in user_story:
        vector = embed([user["story"]])
        other_user_vectors.append(vector)
        other_user_uid.append(user["uid"])
    other_user_matrix = np.array(other_user_vectors)

    # Calculate the similarity scores between the current user vector and all other user vectors in the matrix
    similarity_scores = tf.matmul(other_user_matrix, tf.transpose(current_user_vector))

    # Get the top N most similar users and their scores
    most_similar_users = np.argsort(similarity_scores.numpy().reshape(-1))[::-1][:n]
    most_similar_user_uid = [other_user_uid[i] for i in most_similar_users]
    most_similar_user_scores = similarity_scores.numpy().reshape(-1)[most_similar_users]

    # Convert the similarity scores to float64
    most_similar_user_scores = most_similar_user_scores.astype(np.float64)

    # Create a list of dictionaries containing the user ID and similarity score for each of the top N most similar users
    similar_users = []
    for i in range(1, n):
        similar_user = {"uid": most_similar_user_uid[i], "similarity_score": most_similar_user_scores[i]}
        similar_users.append(similar_user)

    return similar_users

In [17]:
# Load data
user_data, profile_data = load_data()

# Process data
user_data = process_data(user_data, profile_data)

# Generate user story
user_story = generate_user_stories(user_data)

user_data.loc[user_data['uid'] == '06yJpLuZ79Dbzyky0TQL']

Unnamed: 0,uid,displayName,skills,interests
0,06yJpLuZ79Dbzyky0TQL,Daren Lardez,"Go, Usability Testing, IBM Cloud, React Native...",DevOps Engineer


In [20]:
# Find the top N most similar users
find_top_similar_users('06yJpLuZ79Dbzyky0TQL', user_data, user_story, embed, 10)

[{'uid': 'DlmikRZzUud87ZVHWPBv', 'similarity_score': 0.8476112484931946},
 {'uid': 'GYWVAv9MsocnhgVqZHfn', 'similarity_score': 0.7643425464630127},
 {'uid': 'yNY9IhalltJZ02tQU0ix', 'similarity_score': 0.7564221024513245},
 {'uid': 'J5GkwKqxI217nMLNdY1B', 'similarity_score': 0.755081057548523},
 {'uid': 'OtatlU0hDcex4Cn5dRKN', 'similarity_score': 0.7529898285865784},
 {'uid': 'vbxf5J04EfvCznIapW3H', 'similarity_score': 0.7501305937767029},
 {'uid': 'AvufTNycNTN9p5aicfjv', 'similarity_score': 0.7341395616531372},
 {'uid': '6mDvGWpmuiCzNoWqkLfP', 'similarity_score': 0.734026312828064},
 {'uid': 'Gt6OGPcCTzzz2waskmn2', 'similarity_score': 0.7308772802352905}]