<a href="https://colab.research.google.com/github/KalErgetu/Hybrid-AI-recommendation-system/blob/main/AI_final_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q lightfm

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
import pickle
from lightfm import LightFM
from lightfm.data import Dataset
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from lightfm.cross_validation import random_train_test_split
import warnings

warnings.filterwarnings("ignore")


In [None]:
# Load datasets

path = '/content/drive/My Drive/AI/'

Users = pd.read_csv(path + 'Users.csv')
Vendors = pd.read_csv(path + 'Vendors.csv')
Payments = pd.read_csv(path + 'Payments.csv')
Bookings = pd.read_csv(path + 'Bookings.csv')

# --- Data Cleaning ---
def clean_data(Users, Vendors, Payments, Bookings):
    """Clean all input datasets"""
    # [Insert the full cleaning code from previous message]
    return Users, Vendors, Payments, Bookings

Users, Vendors, Payments, Bookings = clean_data(Users, Vendors, Payments, Bookings)

# --- Feature Enhancement ---
def enhance_features(Users, Vendors):
    """Create additional useful features"""
    # [Insert the full feature enhancement code from previous message]
    return Users, Vendors

Users, Vendors = enhance_features(Users, Vendors)

print("Data loaded and cleaned successfully:")
print("Users:")
print(Users.head())
print("Vendors:")
print(Vendors.head())
print("Payments:")
print(Payments.head())
print("Booking:")
print(Bookings.head())

Data loaded and cleaned successfully:
Users:
   User_ID              Name                        Email  \
0        1    George English      clinenicholas@gmail.com   
1        2        Amy Rhodes  nicholas65@barajas-rich.biz   
2        3     Sandra Rivers              xray@morgan.net   
3        4  Stephanie Wilson          christy02@gmail.com   
4        5      John Mendoza            klevine@gmail.com   

                   Phone     Location Event_Preference  
0           065.175.4462  Addis Ababa     Chair Rental  
1  +1-259-302-6313x06471  Addis Ababa  Audio Equipment  
2      684.798.9144x4062  Addis Ababa     Wedding Hall  
3           895-640-0921  Addis Ababa  Audio Equipment  
4    +1-210-316-0175x471  Addis Ababa         Lighting  
Vendors:
   Vendor_ID              Business_Name      Category  Annual_Revenue_ETB  \
0          1   Ellis, Lopez and Johnson      Catering           120159.36   
1          2  Park, Christian and Scott      Lighting            85833.21   
2     

In [None]:
# Enhanced feature engineering for vendors
def enhance_vendor_features(Vendors):
    """Create additional useful features for vendors"""
    # Price range categorization
    Vendors['Price_Category'] = pd.qcut(Vendors['Annual_Revenue_ETB'],
                                      q=5,
                                      labels=['Budget', 'Economy', 'Mid-Range', 'Premium', 'Luxury'])

    # Availability score (based on booking frequency)
    booking_frequency = Bookings.groupby('Vendor_ID').size()
    Vendors['Availability_Score'] = 1 / (1 + booking_frequency.reindex(Vendors.Vendor_ID).fillna(0))

    # Calculate vendor popularity score
    Vendors['Popularity_Score'] = (
        0.4 * Vendors['Rating'] +
        0.3 * (Vendors['Annual_Revenue_ETB'] / Vendors['Annual_Revenue_ETB'].max()) +
        0.3 * Vendors['Availability_Score']
    )

    return Vendors

# Enhanced feature engineering for users
def enhance_user_features(Users, Payments, Bookings):
    """Create additional useful features for users"""
    # Combine user interactions
    user_interactions = pd.concat([
        Payments[['User_ID', 'Transaction_Amount_ETB']],
        Bookings[['User_ID', 'Amount_Spent_ETB']].rename(columns={'Amount_Spent_ETB': 'Transaction_Amount_ETB'})
    ])

    # Calculate user spending patterns
    spending_patterns = user_interactions.groupby('User_ID').agg({
        'Transaction_Amount_ETB': ['mean', 'sum', 'count']
    }).reset_index()
    spending_patterns.columns = ['User_ID', 'Avg_Spending', 'Total_Spending', 'Transaction_Count']

    # Merge with Users
    Users = Users.merge(spending_patterns, on='User_ID', how='left')

    # Create user segments based on spending
    Users['User_Segment'] = pd.qcut(Users['Total_Spending'].fillna(0),
                                  q=4,
                                  labels=['Low', 'Medium', 'High', 'Premium'])

    return Users

In [None]:
# Combine interactions from payments and bookings
interactions = pd.concat([
    Payments[['User_ID', 'Vendor_ID', 'Transaction_Amount_ETB']].rename(columns={'Transaction_Amount_ETB': 'Interaction_Value'}),
    Bookings[['User_ID', 'Vendor_ID', 'Amount_Spent_ETB']].rename(columns={'Amount_Spent_ETB': 'Interaction_Value'})
])
# After merging payments and bookings
print("\n=== Combined Interactions ===")
print("Merged Payment + Booking Sample:")
print(interactions.head())
print("Shape of interactions_df:", interactions.shape)

# Sum interaction values (can be frequency, avg, or sum depending on use case)
interaction_grouped = interactions.groupby(['User_ID', 'Vendor_ID'])['Interaction_Value'].sum().reset_index()

# After grouping and summing interactions
print("\n=== Grouped Interactions ===")
print("Interaction grouped head:")
print(interaction_grouped.head())
print("Shape:", interaction_grouped.shape)
print("Number of unique users:", interaction_grouped['User_ID'].nunique())
print("Number of unique vendors:", interaction_grouped['Vendor_ID'].nunique())

# Encode User_ID and Vendor_ID as integer indices
user_encoder = LabelEncoder()
vendor_encoder = LabelEncoder()

interaction_grouped['user_index'] = user_encoder.fit_transform(interaction_grouped['User_ID'])
interaction_grouped['vendor_index'] = vendor_encoder.fit_transform(interaction_grouped['Vendor_ID'])

# Create lookup dictionaries for original IDs
user_id_lookup = dict(zip(interaction_grouped['user_index'], interaction_grouped['User_ID']))
vendor_id_lookup = dict(zip(interaction_grouped['vendor_index'], interaction_grouped['Vendor_ID']))

# Create the sparse matrix (rows: users, cols: vendors)
sparse_user_vendor = csr_matrix((
    interaction_grouped['Interaction_Value'],
    (interaction_grouped['user_index'], interaction_grouped['vendor_index'])
))

print("\n=== Sparse Matrix Info ===")
print("Sparse matrix shape:", sparse_user_vendor.shape)
print("Number of non-zero elements:", sparse_user_vendor.nnz)




=== Combined Interactions ===
Merged Payment + Booking Sample:
   User_ID  Vendor_ID  Interaction_Value
0    64078       2542           32154.78
1    67619       1715           28638.04
2    76812        253           20587.46
3    74231        177            9477.45
4    44354       1525           19083.02
Shape of interactions_df: (400000, 3)

=== Grouped Interactions ===
Interaction grouped head:
   User_ID  Vendor_ID  Interaction_Value
0        1        507           38718.21
1        1       3106           45198.80
2        2        484           40624.08
3        2       2268           12221.97
4        2       2848            5288.61
Shape: (399830, 3)
Number of unique users: 98144
Number of unique vendors: 5000

=== Sparse Matrix Info ===
Sparse matrix shape: (98144, 5000)
Number of non-zero elements: 399830


In [None]:
# Cold Start Recommendation using Content-Based Filtering

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def handle_cold_start_new_user(user_profile, vendors_df, top_n=5):
    """
    Recommend vendors to a new user based on their profile.
    user_profile: dict with keys like 'interests', 'location', etc.
    vendors_df: Vendors dataframe with 'services', 'category', 'location' columns
    """
    vendors = vendors_df.copy()
    vendors['features'] = (vendors['services'].fillna('') + ' ' + vendors['category'].fillna('') + ' ' + vendors['location'].fillna('')).str.lower()
    user_text = (user_profile.get('interests', '') + ' ' + user_profile.get('location', '')).lower()


    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(vendors['features'].tolist() + [user_text])

    user_vec = tfidf_matrix[-1]
    vendor_vecs = tfidf_matrix[:-1]

    scores = cosine_similarity(user_vec, vendor_vecs).flatten()
    top_indices = scores.argsort()[-top_n:][::-1]
    return vendors.iloc[top_indices][['vendor_id', 'category', 'location', 'services']]

def handle_cold_start_new_vendor(vendor_profile, users_df, top_n=5):
    """
    Recommend users to a new vendor based on vendor profile.
    vendor_profile: dict with keys like 'services', 'category', 'location'
    users_df: Users dataframe with 'interests', 'location' columns
    """
    users = users_df.copy()
    users['features'] = (users['interests'].fillna('') + ' ' + users['location'].fillna('')).str.lower()
    vendor_text = ( vendor_profile.get('services', '') + ' ' + vendor_profile.get('category', '') + ' ' + vendor_profile.get('location', '') ).lower()

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(users['features'].tolist() + [vendor_text])

    vendor_vec = tfidf_matrix[-1]
    user_vecs = tfidf_matrix[:-1]

    scores = cosine_similarity(vendor_vec, user_vecs).flatten()
    top_indices = scores.argsort()[-top_n:][::-1]
    return users.iloc[top_indices][['user_id', 'interests', 'location']]



In [None]:
# --- Prepare vendor features for LightFM ---
print("\nPreparing vendor features for LightFM...")

def prepare_vendor_features_lightfm(Vendors):
    # 1. Identify categorical features if present
    categorical_features = []
    if 'Category' in Vendors.columns:
        categorical_features.append('Category')
    if 'Region' in Vendors.columns:
        categorical_features.append('Region')

    # Fill missing categorical with 'Unknown' to avoid issues later
    Vendors[categorical_features] = Vendors[categorical_features].fillna('Unknown')

    # 2. Select numerical features
    vendor_features = Vendors.select_dtypes(include=[np.number])

    # If no numerical features, add dummy feature
    if vendor_features.empty:
        vendor_features = pd.DataFrame({'dummy': np.ones(len(Vendors))}, index=Vendors.index)

    # Normalize numerical features
    scaler = StandardScaler()
    vendor_features_scaled_array = scaler.fit_transform(vendor_features)
    vendor_features_scaled = pd.DataFrame(vendor_features_scaled_array, index=Vendors['Vendor_ID'], columns=vendor_features.columns)

    # 3. Bin numerical features into 5 bins and convert to categorical-like string features
    binned_features = {}
    for col in vendor_features_scaled.columns:
       # Create 5 bins for each numerical feature
       bins = pd.cut(vendor_features_scaled[col], bins=5, labels=False)
       binned_features[col] = [f"{col}_bin_{val}" for val in bins]

    binned_df = pd.DataFrame(binned_features, index=vendor_features_scaled.index)

    # 4. Combine categorical and binned numerical features for each vendor
    vendors_indexed = Vendors.set_index('Vendor_ID')

    vendor_features_lightfm = []
    for vendor_id in vendor_features_scaled.index:
        features = []

        # Add categorical features
        for cat_col in categorical_features:
            cat_val = vendors_indexed.loc[vendor_id, cat_col]
            features.append(f"{cat_col}_{cat_val}")

        # Add binned numerical features
        for col in binned_df.columns:
            features.append(binned_df.loc[vendor_id, col])

        vendor_features_lightfm.append((vendor_id, features))

    return vendor_features_lightfm

# Usage:
vendor_features_lightfm = prepare_vendor_features_lightfm(Vendors)





Preparing vendor features for LightFM...


In [None]:
# --- Prepare LightFM dataset ---
print("\nPreparing LightFM dataset...")
dataset = Dataset()

# Get unique users and vendors from your processed data
unique_users = interaction_grouped['User_ID'].unique()
unique_vendors = interaction_grouped['Vendor_ID'].unique()

# Fit dataset with proper feature handling
all_item_features = set()
for _, features in vendor_features_lightfm:
    all_item_features.update(features)

dataset.fit(
    users=unique_users,
    items=unique_vendors,
    item_features=list(all_item_features)
)



Preparing LightFM dataset...


In [None]:
# Build interactions matrix
(interactions_matrix, weights) = dataset.build_interactions(
    [(row['User_ID'], row['Vendor_ID'], row['Interaction_Value'])
    for _, row in interaction_grouped.iterrows()
])

# Build item features matrix
item_features = dataset.build_item_features(
    [(vendor_id, features)
     for vendor_id, features in vendor_features_lightfm
     if vendor_id in unique_vendors]
)

print("\nMatrix shapes:")
print("Interactions matrix:", interactions_matrix.shape)
print("Item features matrix:", item_features.shape)


Matrix shapes:
Interactions matrix: (98144, 5000)
Item features matrix: (5000, 5022)


In [None]:
# --- Split interactions using built-in LightFM method ---
train_interactions, test_interactions = random_train_test_split(
    interactions_matrix,
    test_percentage=0.2,
    random_state=42
)

# --- Hyperparameter Tuning Function ---
def tune_lightfm(train, test, item_features):
    # Define parameter grid
    param_grid = {
        'loss': ['warp', 'logistic'],
        'no_components': [20, 30, 40],
        'learning_rate': [0.01, 0.05],
        'item_alpha': [1e-6, 1e-5],
        'user_alpha' : [1e-6, 1e-5]
    }

    best_score = -1
    best_params = {}
    best_model = None

    # Grid search
    for loss in param_grid['loss']:
        for n_comp in param_grid['no_components']:
            for lr in param_grid['learning_rate']:
                for item_alpha in param_grid['item_alpha']:
                  for user_alpha in param_grid['user_alpha']:
                    print(f"\nTesting config: loss={loss}, n_comp={n_comp}, lr={lr}, item_alpha={item_alpha}, user_alpha={user_alpha}")

                    model = LightFM(
                        loss=loss,
                        no_components=n_comp,
                        learning_rate=lr,
                        item_alpha=item_alpha,
                        user_alpha=user_alpha,
                        random_state=42
                    )

                    # Train with early stopping
                    best_epoch_score = -1
                    no_improvement = 0
                    best_epoch_model = None

                    for epoch in range(50):
                        model.fit_partial(
                            train,
                            item_features=item_features,
                            epochs=1,
                            num_threads=4
                        )

                        # Use safe evaluation
                        current_metrics = safe_evaluate(
                            model,
                            test,
                            item_features
                        )

                        if current_metrics is None:
                            print("Evaluation failed, skipping configuration")
                            break
                        current_auc = current_metrics.get('auc', -1)
                        print(f"Epoch {epoch+1}: AUC = {current_auc:.4f}", end='\r')

                        current_auc = auc_score(
                            model,
                            test,
                            item_features=item_features
                        ).mean()

                        print(f"Epoch {epoch+1}: AUC = {current_auc:.4f}", end='\r')

                        # Early stopping
                        if current_auc > best_epoch_score:
                            best_epoch_score = current_auc
                            no_improvement = 0
                            best_epoch_model = model
                        else:
                            no_improvement += 1
                            if no_improvement >= 5:
                                model = best_epoch_model
                                break

                    if best_epoch_score > best_score and best_epoch_model:
                        best_score = best_epoch_score
                        best_params = {
                            'loss': loss,
                            'no_components': n_comp,
                            'learning_rate': lr,
                            'item_alpha': item_alpha,
                            'user_alpha': user_alpha
                        }
                        best_model = best_epoch_model
                        print(f"\nNew best config! AUC = {best_score:.4f}")

    return best_model, best_params

# --- Main Execution ---
if __name__ == "__main__":

    # Define model save path in Google Drive
    model_path = '/content/drive/My Drive/AI/lightfm_model.pkl'
    params_path = '/content/drive/My Drive/AI/lightfm_params.pkl'
    metrics_path = '/content/drive/My Drive/AI/lightfm_metrics.pkl'

    # Train or load model
    if os.path.exists(model_path) and os.path.exists(params_path):
        print("Loading saved model...")
        with open(model_path, 'rb') as f:
            model = pickle.load(f)
        with open(params_path, 'rb') as f:
            best_params = pickle.load(f)
        if os.path.exists(metrics_path):
          with open(metrics_path, 'rb') as f:
            final_metrics = pickle.load(f)
    else:
        print("Tuning hyperparameters...")
        model, best_params = tune_lightfm(
            train_interactions,
            test_interactions,
            item_features
        )

        # Evaluate final model
        final_metrics = safe_evaluate(
            model,
            test_interactions,
            item_features=item_features
        )

        # Save best model and parameters
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        with open(params_path, 'wb') as f:
            pickle.dump(best_params, f)
        with open(metrics_path, 'wb') as f:
            pickle.dump(final_metrics, f)

    print("\n=== Best Parameters ===")
    for k, v in best_params.items():
        print(f"{k}: {v}")


Loading saved model...

=== Best Parameters ===
loss: warp
no_components: 30
learning_rate: 0.01
item_alpha: 1e-06


In [None]:
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
import numpy as np

def robust_evaluate(model, test_interactions, item_features=None, k=5):
    """
    Ultra-robust evaluation that handles all edge cases
    """
    metrics = {}

    # Basic evaluation with item features
    try:
        print("\nAttempting basic evaluation (with item features)...")
        metrics['precision'] = precision_at_k(
            model,
            test_interactions,
            item_features=item_features,
            user_features=None,
            k=k
        ).mean()

        metrics['recall'] = recall_at_k(
            model,
            test_interactions,
            item_features=item_features,
            user_features=None,
            k=k
        ).mean()

        metrics['auc'] = auc_score(
            model,
            test_interactions,
            item_features=item_features,
            user_features=None
        ).mean()

        return metrics

    except Exception as e:
        print(f"Basic evaluation failed: {str(e)}")

    # Fallback: Manual AUC calculation
    try:
        print("\nAttempting manual AUC calculation...")
        test_csr = test_interactions.tocsr()
        n_users, n_items = test_csr.shape
        pos_scores = []
        neg_scores = []

        for user_id in range(n_users):
            user_row = test_csr.getrow(user_id)
            pos_item_indices = user_row.indices

            if len(pos_item_indices) == 0:
                continue

            # Negative sampling: items the user did NOT interact with
            neg_item_indices = np.setdiff1d(np.arange(n_items), pos_item_indices)

            if len(neg_item_indices) == 0:
                continue

            pos_preds = model.predict(user_id, pos_item_indices, item_features=item_features)
            neg_sample = np.random.choice(neg_item_indices, size=len(pos_item_indices), replace=True)
            neg_preds = model.predict(user_id, neg_sample, item_features=item_features)

            pos_scores.extend(pos_preds)
            neg_scores.extend(neg_preds)

        pos_scores = np.array(pos_scores)
        neg_scores = np.array(neg_scores)

        if len(pos_scores) > 0 and len(neg_scores) > 0:
            auc = (pos_scores > neg_scores).mean()
        else:
            auc = np.nan

        metrics['auc'] = auc
        metrics['precision'] = np.nan
        metrics['recall'] = np.nan

        return metrics

    except Exception as e:
        print(f"Manual AUC calculation also failed: {str(e)}")
        return {
            'precision': np.nan,
            'recall': np.nan,
            'auc': np.nan
        }
results = robust_evaluate(model, test_interactions, item_features=item_features, k=5)
print("\nEvaluation Results:")
print(f"Precision@k: {results['precision']}")
print(f"Recall@k: {results['recall']}")
print(f"AUC: {results['auc']}")



Attempting basic evaluation (with item features)...

Evaluation Results:
Precision@k: 0.0002724894729908556
Recall@k: 0.0010324323015065639
AUC: 0.5119229555130005


In [None]:
def handle_cold_start_new_user(user_id, top_n=5):
    """Enhanced cold start handling for new users"""
    try:
        # Get user preferences
        user = Users[Users['User_ID'] == user_id].iloc[0]

        # Create user profile
        user_profile = {
            'Event_Preference': user['Event_Preference'],
            'Location': user['Location']
        }

        # Get vendors matching user preferences
        matching_vendors = Vendors[
            (Vendors['Category'] == user_profile['Event_Preference']) &
            (Vendors['Rating'] >= 4.0)  # High-quality vendors
        ]

        # Sort by popularity score
        matching_vendors = matching_vendors.sort_values('Popularity_Score', ascending=False)

        return matching_vendors['Vendor_ID'].head(top_n).tolist()

    except Exception as e:
        print(f"Error in cold start handling: {str(e)}")
        return get_popular_vendors(top_n)

In [None]:
def get_popular_vendors(top_n=5):
    """Fallback function to return most popular vendors"""
    popular = interaction_grouped['Vendor_ID'].value_counts().head(top_n).index.tolist()
    return popular

def get_similar_vendors(vendor_id, top_n=5):
    """Get content-based similar vendors"""
    try:
        if vendor_id not in vendor_features_scaled.index:
            print(f"Vendor {vendor_id} not found in features data.")
            return []

        # Get vendor features
        vendor_vec = vendor_features_scaled.loc[vendor_id].values.reshape(1, -1)
        all_features = vendor_features_scaled.values

        # Calculate cosine similarity
        similarities = cosine_similarity(vendor_vec, all_features).flatten()

        # Get most similar (excluding self)
        similar_indices = np.argsort(-similarities)[1:top_n+1]
        similar_vendors = vendor_features_scaled.index[similar_indices]

        return list(similar_vendors)

    except Exception as e:
        print(f"Error finding similar vendors: {str(e)}")
        return []

In [None]:
# --- Recommendation Functions ---
def hybrid_recommend(user_id, top_n=5, return_scores=False):
    """
    Generate hybrid recommendations for a given user
    Args:
        user_id: The original user ID from your data
        top_n: Number of recommendations to return
        return_scores: If True, returns (vendors, scores) tuple
    Returns:
        List of recommended vendor IDs or (vendors, scores) tuple
    """
    try:
        # Get user index from LightFM's mapping
        user_idx = dataset.mapping()[0][user_id]

        # Get all vendor indices
        all_vendor_indices = np.arange(interactions_matrix.shape[1])

        # Generate prediction scores (combines collaborative and content-based)
        scores = model.predict(
            user_idx,
            all_vendor_indices,
            item_features=item_features
        )

        # Get top N vendor indices
        top_indices = np.argsort(-scores)[:top_n]
        # Map back to original vendor IDs
        vendor_id_map = {v: k for k, v in dataset.mapping()[2].items()}
        recommended_vendors = [vendor_id_map[idx] for idx in top_indices]
        recommended_scores = scores[top_indices]

        if return_scores:
            return recommended_vendors, recommended_scores
        return recommended_vendors

    except KeyError:
        print(f"\nUser {user_id} not found in training data (cold-start user).")

        # Cold start: prompt for preferences and recommend based on similarity
        recommendations = prompt_for_preferences(top_n)

        if return_scores:
            return recommendations, [None] * len(recommendations)
        return recommendations

    except Exception as e:
        print(f"Error generating recommendations: {str(e)}")
        return []

# --- Example Usage ---
if len(interaction_grouped) > 0:
    # Get a sample user that exists in the data
    sample_user = interaction_grouped['User_ID'].iloc[0]

    print(f"\nGenerating recommendations for user {sample_user}...")
    recommendations, scores = hybrid_recommend(sample_user, top_n=5, return_scores=True)

    print("\nTop Recommendations:")
    for vendor, score in zip(recommendations, scores):
        vendor_name = Vendors[Vendors['Vendor_ID'] == vendor]['Vendor_Name'].values[0] if 'Vendor_Name' in Vendors.columns else vendor
        print(f"- {vendor_name} (score: {score:.3f})")
    # Show similar vendors to the top recommendation
    if recommendations:
        print(f"\nVendors similar to {recommendations[0]}:")
        similar_vendors = get_similar_vendors(recommendations[0])
        for vendor in similar_vendors:
            vendor_name = Vendors[Vendors['Vendor_ID'] == vendor]['Vendor_Name'].values[0] if 'Vendor_Name' in Vendors.columns else vendor
            print(f"- {vendor_name}")
else:
    print("\nNo interactions found - cannot generate recommendations")


Generating recommendations for user 1...

Top Recommendations:
- 4880 (score: 0.118)
- 4648 (score: 0.115)
- 4301 (score: 0.114)
- 4124 (score: 0.112)
- 4529 (score: 0.109)

Vendors similar to 4880:
Error finding similar vendors: name 'vendor_features_scaled' is not defined


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_metrics(metrics):
    """Visualize evaluation metrics"""
    plt.figure(figsize=(12, 6))

    # Prepare data
    metric_names = [k.replace('train_', '').replace('test_', '') for k in metrics.keys() if 'train' in k]
    train_values = [v for k, v in metrics.items() if 'train' in k]
    test_values = [v for k, v in metrics.items() if 'test' in k]

    x = range(len(metric_names))

    # Plot bars
    plt.bar(x, train_values, width=0.4, label='Train', align='center')
    plt.bar([i + 0.4 for i in x], test_values, width=0.4, label='Test', align='center')

    # Customize plot
    plt.xticks([i + 0.2 for i in x], metric_names, rotation=45)
    plt.title('Model Performance Comparison')
    plt.ylabel('Score')
    plt.ylim(0, 1)
    plt.legend()

    # Add value labels
    for i, v in enumerate(train_values):
        plt.text(i - 0.1, v + 0.02, f"{v:.2f}", color='blue')
    for i, v in enumerate(test_values):
        plt.text(i + 0.3, v + 0.02, f"{v:.2f}", color='orange')

    plt.tight_layout()
    plt.show()  # Explicitly show the plot