# Vendor Scoring Model

This notebook implements a scoring model to evaluate and rank vendors based on multiple performance metrics.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Load cleaned data
df = pd.read_csv('../data/processed/cleaned_vendor_data.csv')

In [None]:
# Define weights for different metrics
# These weights can be adjusted based on business priorities
weights = {
    'quality_score': 0.4,
    'delivery_performance': 0.25,
    'cost_efficiency': 0.2,
    'responsiveness_score': 0.15
}

# Calculate weighted scores
df['weighted_score'] = (
    df['quality_score'] * weights['quality_score'] +
    df['delivery_performance'] * weights['delivery_performance'] +
    df['cost_efficiency'] * weights['cost_efficiency'] +
    df['responsiveness_score'] * weights['responsiveness_score']
)

In [None]:
# Normalize scores to 0-100 scale
min_score = df['weighted_score'].min()
max_score = df['weighted_score'].max()
df['normalized_score'] = ((df['weighted_score'] - min_score) / (max_score - min_score)) * 100

In [None]:
# Rank vendors based on normalized scores
vendor_rankings = df.groupby('vendor_name').agg({
    'normalized_score': ['mean', 'std', 'count'],
    'quality_score': 'mean',
    'delivery_performance': 'mean',
    'cost_efficiency': 'mean',
    'responsiveness_score': 'mean'
}).round(2)

# Flatten column names
vendor_rankings.columns = ['avg_score', 'score_std', 'eval_count', 
                          'avg_quality', 'avg_delivery', 'avg_cost_efficiency', 'avg_responsiveness']

# Sort by average score
vendor_rankings = vendor_rankings.sort_values('avg_score', ascending=False)
vendor_rankings.head(10)

In [None]:
# Create vendor categories based on scores
def categorize_vendor(score):
    if score >= 85:
        return 'Premium'
    elif score >= 70:
        return 'Standard'
    elif score >= 50:
        return 'Basic'
    else:
        return 'Needs Improvement'

vendor_rankings['category'] = vendor_rankings['avg_score'].apply(categorize_vendor)

# Show distribution of categories
category_counts = vendor_rankings['category'].value_counts()
print(category_counts)

# Visualize category distribution
plt.figure(figsize=(8, 6))
sns.countplot(data=vendor_rankings.reset_index(), x='category', order=['Premium', 'Standard', 'Basic', 'Needs Improvement'])
plt.title('Vendor Category Distribution')
plt.ylabel('Number of Vendors')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Generate detailed vendor report
def generate_vendor_report(vendor_name):
    vendor_data = df[df['vendor_name'] == vendor_name]
    
    report = {
        'Vendor Name': vendor_name,
        'Average Overall Score': round(vendor_data['normalized_score'].mean(), 2),
        'Average Quality Score': round(vendor_data['quality_score'].mean(), 2),
        'Average Delivery Performance': round(vendor_data['delivery_performance'].mean(), 2),
        'Average Cost Efficiency': round(vendor_data['cost_efficiency'].mean(), 2),
        'Average Responsiveness Score': round(vendor_data['responsiveness_score'].mean(), 2),
        'Number of Evaluations': len(vendor_data),
        'Score Variance': round(vendor_data['normalized_score'].std(), 2),
        'Category': categorize_vendor(vendor_data['normalized_score'].mean())
    }
    
    return report

# Generate reports for top 5 vendors
top_vendors = vendor_rankings.head(5).index.tolist()
for vendor in top_vendors:
    report = generate_vendor_report(vendor)
    print(f"\n--- Vendor Report: {report['Vendor Name']} ---")
    for key, value in report.items():
        print(f"{key}: {value}")

In [None]:
# Save rankings to CSV
vendor_rankings.to_csv('../reports/vendor_rankings.csv')
print("Vendor rankings saved to ../reports/vendor_rankings.csv")