# Healthcare Provider Recommendation System - Visualization

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add parent directory to path
sys.path.append('..')

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

In [None]:
# Load data
data_dir = Path('../data')
providers_df = pd.read_csv(data_dir / 'providers_data.csv')
ratings_df = pd.read_csv(data_dir / 'ratings_data.csv')
print(f"Loaded {len(providers_df)} providers and {len(ratings_df)} ratings")

In [None]:
# Quality vs Cost visualization
plt.figure(figsize=(10, 8))
sns.scatterplot(x='quality_score', y='cost', hue='specialty', alpha=0.6, s=30, data=providers_df)
plt.title('Provider Quality vs Cost by Specialty')
plt.grid(alpha=0.3)
plt.show()

In [None]:
# Apply clustering
from clustering import cluster_providers, get_cluster_stats

providers_df['quality_norm'] = (providers_df['quality_score'] - providers_df['quality_score'].min()) \
    / (providers_df['quality_score'].max() - providers_df['quality_score'].min())
providers_df['cost_norm'] = (providers_df['cost'] - providers_df['cost'].min()) \
    / (providers_df['cost'].max() - providers_df['cost'].min())

labels = cluster_providers(providers_df, n_clusters=5)
cluster_stats = get_cluster_stats(providers_df)

In [None]:
# Visualize clusters
plt.figure(figsize=(10, 8))
colors = plt.cm.tab10(np.linspace(0, 1, 5))

for i in range(5):
    cluster_data = providers_df[providers_df['cluster'] == i]
    plt.scatter(
        cluster_data['quality_score'], 
        cluster_data['cost'],
        color=colors[i],
        alpha=0.5,
        label=f'Cluster {i}',
        s=30
    )

plt.title('Provider Clusters: Quality vs Cost')
plt.xlabel('Quality Score')
plt.ylabel('Cost')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

In [None]:
# Generate recommendations
from recommender import HealthcareRecommender

recommender = HealthcareRecommender(data_dir='data')
patient_id = ratings_df['patient_id'].value_counts().index[0]
recommendations = recommender.recommend(patient_id, top_n=10)

print(f"Top 10 recommendations for patient {patient_id}:")
recommendations[['provider_id', 'quality_score', 'cost', 'specialty', 'predicted_rating']]

In [None]:
# Visualize recommendations
plt.figure(figsize=(10, 8))

# Plot all providers
plt.scatter(
    recommender.providers_df['quality_score'], 
    recommender.providers_df['cost'],
    alpha=0.2, 
    s=20,
    color='gray',
    label='All Providers'
)

# Plot recommended providers
plt.scatter(
    recommendations['quality_score'],
    recommendations['cost'],
    color='red',
    s=100,
    marker='*',
    label='Recommendations'
)

plt.title(f'Provider Recommendations for Patient {patient_id}')
plt.xlabel('Quality Score')
plt.ylabel('Cost')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

In [None]:
# Pareto front visualization
from optimize_pareto import fast_non_dominated_sort
from collaborative_filtering import predict_ratings

predicted_ratings = predict_ratings(recommender.U, recommender.sigma, recommender.Vt, 
                                  patient_id, recommender.ratings_matrix)

objectives = recommender.providers_df[['provider_id', 'quality_norm', 'cost_norm']].copy()
objectives['rating_pred'] = predicted_ratings
objectives['cost_norm'] = -objectives['cost_norm']

front_indices = fast_non_dominated_sort(objectives)
pareto_front = objectives.iloc[front_indices].copy()

plt.figure(figsize=(10, 8))
plt.scatter(objectives['quality_norm'], -objectives['cost_norm'], 
            alpha=0.2, s=20, color='gray', label='All Providers')
plt.scatter(pareto_front['quality_norm'], -pareto_front['cost_norm'], 
            alpha=0.8, s=40, color='blue', label='Pareto Front')
plt.title('Pareto Front: Quality vs Cost')
plt.xlabel('Quality (normalized)')
plt.ylabel('Cost (normalized, lower is better)')
plt.legend()
plt.grid(alpha=0.3)
plt.show()