In [2]:
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder, StandardScaler

data = pd.read_csv("../datasets/user_profiles.csv")

with open("../dumps/encoder.pkl", "rb") as f:
    encoder = pickle.load(f)

with open("../dumps/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("../dumps/combined_data.pkl", "rb") as f:
    X = pickle.load(f)

categorical_columns = [
    'gender', 'occupation', 'educationLevel', 'preferredCity', 
    'preferredAccommodation', 'dailySchedule', 'smokingHabits', 
    'drinkingHabits', 'dietaryPreferences', 'petPreferences', 
    'introvertExtrovert', 'socialHabits', 
    'hobbiesAndInterests', 'languagePreferences', 'preferredSecurityMeasures', 
    'isLookingFor'  
]
numerical_columns = ['age', 'budgetPriceRange', 'numberOfRoommates']

# Function to recommend profiles for a new user
def recommend_profiles_for_new_user(new_user_data, num_recommendations=3):
    new_user_is_looking_for = new_user_data['isLookingFor'].iloc[0]
    complementary_is_looking_for = 'roommate' if new_user_is_looking_for == 'room' else 'room'
    
    new_user_cats = encoder.transform(new_user_data[categorical_columns]).toarray()
    new_user_nums = scaler.transform(new_user_data[numerical_columns])
    new_user_X = np.hstack([new_user_cats, new_user_nums])
    
    valid_indices = data[data['isLookingFor'] == complementary_is_looking_for].index
    valid_X = X[valid_indices]  # Subset of the dataset
    
    new_user_similarities = cosine_similarity(new_user_X, valid_X).flatten()
    similar_user_indices = valid_indices[np.argsort(new_user_similarities)[::-1][:num_recommendations]]
    
    return data.iloc[similar_user_indices]

new_user_data = pd.DataFrame({
    'gender': ['Male'],
    'occupation': ['Professional'],
    'educationLevel': ['Graduate'],
    'preferredCity': ['New York'],
    'preferredAccommodation': ['Apartment'],
    'dailySchedule': ['Early Bird'],
    'smokingHabits': ['Non-smoker'],
    'drinkingHabits': ['Drinker'],
    'dietaryPreferences': ['Non-vegetarian'],
    'petPreferences': ['Allergic to Pets'],
    'introvertExtrovert': ['Extrovert'],
    'socialHabits': ['Quiet'],
    'hobbiesAndInterests': ['Reading'],
    'languagePreferences': ['English'],
    'preferredSecurityMeasures': ['Secure Building'],
    'isLookingFor': ['room'],  
    'age': [28],
    'budgetPriceRange': [1000],
    'numberOfRoommates': [2]
})

recommended_profiles = recommend_profiles_for_new_user(new_user_data, num_recommendations=2)
user_ids = recommended_profiles['googleId'].values.tolist()
print(user_ids)


[42343, 35634]
