In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors

# Load clustered dataset
df_clustered = pd.read_csv("Clustered_Exhibition_Data.csv")

# Features to encode
features = ['Nationality', 'Gender', 'ExhibitionRole']

# Encode categorical features
label_encoders = {}
for col in features:
    le = LabelEncoder()
    df_clustered[col] = le.fit_transform(df_clustered[col])
    label_encoders[col] = le

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_clustered[features])

# Apply K-Means clustering
k = 6
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
df_clustered["Cluster"] = kmeans.fit_predict(X_scaled)

# Train KNN within each cluster
knn_models = {}
for cluster_id in range(k):
    cluster_data = df_clustered[df_clustered["Cluster"] == cluster_id][features]
    if not cluster_data.empty:
        knn = NearestNeighbors(n_neighbors=5, metric="euclidean")
        knn.fit(cluster_data)
        knn_models[cluster_id] = knn

# Recommendation function
def recommend_exhibitions(user_input):
    user_data = pd.DataFrame([user_input], columns=features)

    # Encode user input
    for col in features:
        if user_data[col][0] not in label_encoders[col].classes_:
            user_data[col] = label_encoders[col].classes_[0]
        user_data[col] = label_encoders[col].transform(user_data[col])

    # Scale user input
    user_scaled = scaler.transform(user_data)

    # Convert back to DataFrame (fixes NearestNeighbors warning)
    user_scaled_df = pd.DataFrame(user_scaled, columns=features)

    # Assign user to a cluster
    user_cluster = kmeans.predict(user_scaled)[0]

    if user_cluster in knn_models:
        knn = knn_models[user_cluster]
        distances, indices = knn.kneighbors(user_scaled_df)

        recommended_exhibitions = df_clustered.iloc[indices[0]].copy()

        # Convert encoded values back to original categories
        for col in features:
            recommended_exhibitions[col] = label_encoders[col].inverse_transform(recommended_exhibitions[col])

        # Prioritize exact role matches
        exact_match = recommended_exhibitions[recommended_exhibitions["ExhibitionRole"] == user_input[2]]

        return exact_match if not exact_match.empty else recommended_exhibitions

    else:
        return "No recommendations found."

# Test users
test_users = [
    ["American", "Female", "Curator"],
    ["European", "Female", "Artist"],
    ["Asian", "Male", "Painter"]
]

# Get recommendations
for test_user in test_users:
    print(f"\nRecommended Exhibitions for User {test_user}:")
    print(recommend_exhibitions(test_user))



Recommended Exhibitions for User ['American', 'Female', 'Curator']:
     ExhibitionID ExhibitionNumber  \
232        2606.0               12   
100        2944.0               5a   
182        2729.0                9   
185        2729.0                9   
168        2729.0                9   

                                       ExhibitionTitle ExhibitionBeginDate  \
232  Memorial Exhibition: The Collection of the Lat...          1931-05-17   
100    46 Painters and Sculptors under 35 Years of Age          1930-11-04   
182         Painting and Sculpture by Living Americans          1930-03-12   
185         Painting and Sculpture by Living Americans          1930-03-12   
168         Painting and Sculpture by Living Americans          1930-03-12   

    ExhibitionEndDate                       ExhibitionURL ExhibitionRole  \
232        1931-06-10  moma.org/calendar/exhibitions/1707         Artist   
100        1930-04-27  moma.org/calendar/exhibitions/2025         Artist   
182  