In [10]:
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder, StandardScaler

data = pd.read_csv("user_profiles.csv")

# Load the encoder, scaler, and combined data matrix from pickle files
with open("./dumps/encoder.pkl", "rb") as f:
    encoder = pickle.load(f)

with open("./dumps/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("./dumps/combined_data.pkl", "rb") as f:
    X = pickle.load(f)

# Column definitions
categorical_columns = [
    'Gender', 'Occupation', 'Education_Level', 'Preferred_City', 
    'Proximity_to_Workplace/College', 'Preferred_Accommodation', 
    'Daily_Schedule', 'Smoking_Habits', 'Drinking_Habits', 
    'Dietary_Preferences', 'Pet_Preferences', 'Introvert/Extrovert', 
    'Cleanliness_Level', 'Social_Habits', 'Hobbies_and_Interests', 
    'Language_Preferences', 'Preferred_Security_Measures', 
    'Past_Experiences_with_Roommates'
]
numerical_columns = ['Age', 'Budget/Price_Range', 'Number_of_Roommates']

# Function to recommend profiles for a new user
def recommend_profiles_for_new_user(new_user_data, num_recommendations=3):
    # Transform the new user's data
    new_user_cats = encoder.transform(new_user_data[categorical_columns]).toarray()
    new_user_nums = scaler.transform(new_user_data[numerical_columns])
    new_user_X = np.hstack([new_user_cats, new_user_nums])
    
    # Compute similarities
    new_user_similarities = cosine_similarity(new_user_X, X).flatten()
    similar_user_indices = np.argsort(new_user_similarities)[::-1][:num_recommendations]
    
    # Return recommended profiles
    return data.iloc[similar_user_indices]

# Example new user data (as a DataFrame)
new_user_data = pd.DataFrame({
    'Gender': ['Male'],
    'Occupation': ['Professional'],
    'Education_Level': ['Graduate'],
    'Preferred_City': ['New York'],
    'Proximity_to_Workplace/College': ['Yes'],
    'Preferred_Accommodation': ['Apartment'],
    'Daily_Schedule': ['Early Bird'],
    'Smoking_Habits': ['Non-smoker'],
    'Drinking_Habits': ['Drinker'],
    'Dietary_Preferences': ['Non-vegetarian'],
    'Pet_Preferences': ['Allergic to Pets'],
    'Introvert/Extrovert': ['Extrovert'],
    'Cleanliness_Level': ['Neat'],
    'Social_Habits': ['Quiet'],
    'Hobbies_and_Interests': ['Reading'],
    'Language_Preferences': ['English'],
    'Preferred_Security_Measures': ['Secure Building'],
    'Past_Experiences_with_Roommates': ['Good'],
    'Age': [28],
    'Budget/Price_Range': [1000],
    'Number_of_Roommates': [2]
})

# Get recommendations for the new user
recommended_profiles = recommend_profiles_for_new_user(new_user_data, num_recommendations=2)
user_ids = recommended_profiles['User_ID'].values
user_ids = user_ids.tolist()
print(user_ids)


[53456 86341]
[53456, 86341]
