In [1]:
# =====================================
# Synthetic Supplier Data Generation
# =====================================

import pandas as pd
import numpy as np

# -----------------------------
# 1. Set number of suppliers
# -----------------------------
n_suppliers = 10  # you can increase/decrease

# -----------------------------
# 2. Generate synthetic data
# -----------------------------
np.random.seed(42)  # for reproducibility

data = {
    "SupplierID": [f"S{i+1}" for i in range(n_suppliers)],
    
    # Cost per unit ($10 to $50)
    "Cost_per_unit": np.round(np.random.uniform(10, 50, n_suppliers), 2),
    
    # Delivery time in days (3 to 15)
    "Delivery_time_days": np.random.randint(3, 16, n_suppliers),
    
    # Carbon footprint in kg CO2e per unit (5 to 50)
    "Carbon_kgCO2e": np.round(np.random.uniform(5, 50, n_suppliers), 2),
    
    # Quality rating (1 to 10)
    "QualityRating": np.round(np.random.uniform(6, 10, n_suppliers), 1),
    
    # Compliance score (70 to 100)
    "ComplianceScore": np.round(np.random.uniform(70, 100, n_suppliers), 1)
}

df_suppliers = pd.DataFrame(data)

# -----------------------------
# 3. Save to CSV
# -----------------------------
df_suppliers.to_csv("../data/suppliers.csv", index=False)
print("Synthetic supplier data saved to '../data/suppliers.csv'")

# -----------------------------
# 4. Display the data
# -----------------------------
df_suppliers


Synthetic supplier data saved to '../data/suppliers.csv'


Unnamed: 0,SupplierID,Cost_per_unit,Delivery_time_days,Carbon_kgCO2e,QualityRating,ComplianceScore
0,S1,24.98,8,18.69,6.8,79.1
1,S2,48.03,7,28.61,8.1,72.9
2,S3,39.28,4,24.44,8.4,90.5
3,S4,33.95,10,18.11,6.2,83.2
4,S5,16.24,14,32.53,8.4,73.7
5,S6,16.24,8,11.28,6.7,84.9
6,S7,12.32,4,18.15,6.3,71.0
7,S8,44.65,14,21.49,9.8,97.3
8,S9,34.04,7,25.52,9.9,77.8
9,S10,38.32,3,40.33,9.2,89.9
