In [None]:

import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the sample data
file_path = "sample_user_profiles.json"
df = pd.read_json(file_path)


In [None]:

# Convert user expertise into a text format for vectorization
def preprocess_expertise(row):
    expertise = row['resume']['expertise']
    text = " ".join(
        expertise.get('assetClasses', []) +
        expertise.get('products', []) +
        expertise.get('sectors', []) +
        expertise.get('regions', [])
    )
    return text

df['expertise_text'] = df.apply(preprocess_expertise, axis=1)


In [None]:

# Convert liked tags into a text format
df['tags_liked_text'] = df['tagsLiked'].apply(lambda x: " ".join(x))

# Combine all textual features for recommendation
df['profile_text'] = df['expertise_text'] + " " + df['tags_liked_text']


In [None]:

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
profile_matrix = vectorizer.fit_transform(df['profile_text'])

# Compute similarity scores
similarity_matrix = cosine_similarity(profile_matrix)

# Save the model
joblib.dump((vectorizer, similarity_matrix, df), "user_recommendation_model.pkl")

print("Model saved successfully!")
