In [None]:
pip install Faker

Collecting Faker
  Downloading faker-37.4.2-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.4.2-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Faker
Successfully installed Faker-37.4.2


In [None]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

fake = Faker()

num_records = 1000

supplier_categories = ['IT Services', 'Medical Supplies', 'Office Equipment',
                       'Consulting', 'Facilities Maintenance', 'Logistics']
departments = ['Finance', 'HR', 'IT', 'Operations', 'Patient Care', 'Admin']

# --- Generate dummy data ---
data = []
for i in range(num_records):
    order_date = fake.date_between(start_date='-2y', end_date='today')
    delivery_date = order_date + timedelta(days=random.randint(3, 30))

    unit_price = round(random.uniform(10, 1000), 2)
    quantity = random.randint(1, 10)
    total_price = round(unit_price * quantity, 2)

    # Simulate potential savings
    savings_identified = 0
    if random.random() < 0.3:
        savings_identified = round(total_price * random.uniform(0.05, 0.20), 2)

    # Simulate realized savings
    savings_realized = 0
    if savings_identified > 0:
        savings_realized = round(savings_identified * random.uniform(0.0, 1.0), 2)

    data.append({
        'TransactionID': f'TRX{i+1:05d}',
        'OrderDate': order_date,
        'DeliveryDate': delivery_date,
        'SupplierName': fake.company(),
        'SupplierCategory': random.choice(supplier_categories),
        'Department': random.choice(departments),
        'ItemDescription': fake.word().capitalize() + ' Service/Product',
        'Quantity': quantity,
        'UnitPrice': unit_price,
        'TotalPrice': total_price,
        'ContractReference': f'CON{fake.unique.random_int(min=1000, max=9999)}' if random.random() < 0.7 else None, # 70% have a contract
        'SavingsIdentified': savings_identified,
        'SavingsRealized': savings_realized,
        'PaymentTermsDays': random.choice([30, 60, 90])
    })

df_procurement = pd.DataFrame(data)

print("First 5 rows of generated data:")
print(df_procurement.head())
print("\nDataFrame Info:")
df_procurement.info()

First 5 rows of generated data:
  TransactionID   OrderDate DeliveryDate                  SupplierName  \
0      TRX00001  2025-04-01   2025-04-13  Estrada, Valentine and Baker   
1      TRX00002  2023-07-19   2023-07-26    Davis, Miles and Rodriguez   
2      TRX00003  2023-07-25   2023-08-18                   Jenkins PLC   
3      TRX00004  2025-04-29   2025-05-20                   Green-Smith   
4      TRX00005  2024-10-17   2024-10-21                     Avery PLC   

         SupplierCategory    Department           ItemDescription  Quantity  \
0             IT Services  Patient Care  Southern Service/Product         3   
1  Facilities Maintenance  Patient Care   Country Service/Product         5   
2              Consulting    Operations   General Service/Product         3   
3        Medical Supplies            IT   Respond Service/Product         3   
4        Medical Supplies       Finance  Material Service/Product         6   

   UnitPrice  TotalPrice ContractReference  Savi

In [None]:
# Download the DataFrame to a CSV file
csv_file_name = 'dummy_procurement_spend_data.csv'
df_procurement.to_csv(csv_file_name, index=False)

# Provide a link to download the file directly from Colab
from google.colab import files
files.download(csv_file_name)

print(f"\n'{csv_file_name}' has been generated and is ready for download.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


'dummy_procurement_spend_data.csv' has been generated and is ready for download.
