In [3]:
import pandas as pd
import numpy as np
import random
from faker import Faker

fake = Faker()
np.random.seed(42)



In [2]:
!pip install faker


Collecting faker
  Downloading faker-37.3.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.3.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.9 MB 825.8 kB/s eta 0:00:03
   ----- ---------------------------------- 0.3/1.9 MB 2.0 MB/s eta 0:00:01
   ----------- ---------------------------- 0.6/1.9 MB 3.5 MB/s eta 0:00:01
   -------------------------- ------------- 1.3/1.9 MB 6.3 MB/s eta 0:00:01
   ---------------------------------------- 1.9/1.9 MB 7.7 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.3.0


In [4]:
regions = ['Asia', 'Europe', 'North America', 'South America', 'Africa']
risk_levels = ['Low', 'Medium', 'High']

suppliers = pd.DataFrame({
    'SupplierID': [f"SUP{str(i).zfill(3)}" for i in range(1, 101)],
    'Name': [fake.company() for _ in range(100)],
    'Region': np.random.choice(regions, 100),
    'Country': [fake.country() for _ in range(100)],
    'ReliabilityScore': np.round(np.random.uniform(0.6, 0.99, 100), 2),
    'RiskRating': np.random.choice(risk_levels, 100, p=[0.4, 0.4, 0.2])
})


In [5]:
categories = ['Raw Material', 'Component', 'Packaging', 'Chemical']

products = pd.DataFrame({
    'ProductID': [f"PRD{str(i).zfill(4)}" for i in range(1, 1001)],
    'Name': [fake.word().capitalize() + " " + fake.word().capitalize() for _ in range(1000)],
    'Category': np.random.choice(categories, 1000),
    'SupplierID': np.random.choice(suppliers['SupplierID'], 1000)
})


In [6]:
base_date = pd.to_datetime("2024-01-01")

shipments = pd.DataFrame({
    'ShipmentID': [f"SHP{str(i).zfill(5)}" for i in range(1, 10001)],
    'SupplierID': np.random.choice(suppliers['SupplierID'], 10000),
    'OrderDate': [base_date + pd.Timedelta(days=random.randint(0, 180)) for _ in range(10000)],
})

shipments['ExpectedDelivery'] = shipments['OrderDate'] + pd.to_timedelta(np.random.randint(7, 15, size=10000), unit='D')
shipments['ActualDelivery'] = shipments['ExpectedDelivery'] + pd.to_timedelta(np.random.randint(-2, 10, size=10000), unit='D')
shipments['Status'] = np.where(shipments['ActualDelivery'] > shipments['ExpectedDelivery'], 'Delayed', 'Delivered')


In [7]:
event_types = ['Flood', 'Strike', 'Cyberattack', 'Earthquake', 'Political Conflict']

disruptions = pd.DataFrame({
    'EventID': [f"EVT{str(i).zfill(3)}" for i in range(1, 101)],
    'Region': np.random.choice(regions, 100),
    'Type': np.random.choice(event_types, 100),
    'StartDate': [base_date + pd.Timedelta(days=random.randint(0, 180)) for _ in range(100)],
})

disruptions['EndDate'] = disruptions['StartDate'] + pd.to_timedelta(np.random.randint(3, 15, size=100), unit='D')
disruptions['Severity'] = np.random.randint(1, 11, size=100)


In [8]:
vendor_ratings = pd.DataFrame({
    'RatingID': [f"RAT{str(i).zfill(5)}" for i in range(1, 5001)],
    'SupplierID': np.random.choice(suppliers['SupplierID'], 5000),
    'RatingDate': [base_date + pd.Timedelta(days=random.randint(0, 180)) for _ in range(5000)],
    'Score': np.random.randint(1, 11, 5000),
    'Notes': [fake.sentence(nb_words=5) for _ in range(5000)]
})


In [9]:
suppliers.to_csv("suppliers.csv", index=False)
products.to_csv("products.csv", index=False)
shipments.to_csv("shipments.csv", index=False)
disruptions.to_csv("disruptions.csv", index=False)
vendor_ratings.to_csv("vendor_ratings.csv", index=False)
