# Data Simulation Building Blocks

This notebook demonstrates the current building blocks of data simulation.

In [1]:
pip install llm_connect

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install torch

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip show torch

Name: torch
Version: 2.1.2
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /Users/nujoumunus/Library/Python/3.9/lib/python/site-packages
Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions
Required-by: accelerate, llm_connect, sentence-transformers
Note: you may need to restart the kernel to use updated packages.


In [4]:
! which python3

/usr/bin/python3


In [5]:
! python3 -m pip show llm_connect

Name: llm_connect
Version: 2.0.0
Summary: LLM Connect API
Home-page: 
Author: CeADAR Connect Group
Author-email: 
License: 
Location: /Users/nujoumunus/Library/Python/3.9/lib/python/site-packages
Requires: accelerate, bitsandbytes, click, colorama, setuptools, torch, transformers
Required-by: 


In [6]:
import sys
sys.path.append('/Users/nujoumunus/Library/Python/3.9/lib/python/site-packages')


In [7]:
import sys
print(sys.executable)


/Library/Developer/CommandLineTools/usr/bin/python3


In [8]:
!{sys.executable} -m pip install llm_connect


Defaulting to user installation because normal site-packages is not writeable


In [9]:
pip list | grep llm_connect

llm_connect               2.0.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install dotenv

Defaulting to user installation because normal site-packages is not writeable
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting python-dotenv (from dotenv)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, dotenv
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [dotenv]
[1A[2KSuccessfully installed dotenv-0.9.9 python-dotenv-1.1.0
Note: you may need to restart the kernel to use updated packages.


In [14]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# import pyrsm as rsm

# sys.path.append("../code/")
# import llm_connect

# from llm_connect import get_response
from dotenv import load_dotenv

load_dotenv()

np.random.seed(1234)

#### Data Simulation

In [15]:
import numpy as np
import pandas as pd

# Simulated LLM response (replace this with actual get_response() if LLM available)
def get_response(inputs, template, role=None, llm="llama", md=False):
    # MOCK SIMULATION: This should be replaced with actual LLM output
    region, expertise = inputs
    return f"{region} {expertise.split(',')[0]} Solutions Ltd."

def get_name_response(region, template, role=None, llm="llama", md=False):
    # MOCK SIMULATION: This should be replaced with actual LLM output
    fake_names = {
        "Japan": "Hiroshi Tanaka", "India – South": "Arjun Mehta", "Spain": "Rafael Dominguez",
        "USA – East": "Emily Carter", "China": "Mei Lin", "Brazil": "Carlos Silva",
        "Kuwait": "Ali Faris", "Canada": "Sophie Dubois", "Italy": "Giovanni Russo",
        "Mexico": "Luis Herrera", "Singapore": "Jia Wei", "Vietnam": "Thien Bao",
        "Malaysia": "Aiman Razak", "South Korea": "Min-Joon Park", "Saudi Arabia": "Fatimah Al-Dossary",
        "UAE – West": "Zayd Al-Maktoum"
    }
    return fake_names.get(region, f"Contact from {region}")

# Set seed for reproducibility
np.random.seed(42)

# Number of vendor records to simulate
n_vendors = 25

# Sample pools
vendor_regions = [
    "USA – East", "Brazil", "Singapore", "South Korea", "India – South", "Malaysia",
    "Spain", "Saudi Arabia", "Italy", "Kuwait", "Canada", "Japan", "Mexico", "UAE – West", "China", "Vietnam"
]

certification_pool = [
    "ISO 9001", "FM Approved", "UL Listed", "API Monogram", 
    "CE", "ASME", "Local Safety Compliance", "Material Test Report"
]

expertise_pool = [
    "Control Cable", "Lighting Fixture", "Structural Steel", "Cable Tray",
    "HVAC Duct Connection", "Flanges", "Valves", "Fire Rated Door", "Earthing Strip"
]

# Initialize empty list to collect each row
vendor_rows = []

for i in range(n_vendors):
    region = np.random.choice(vendor_regions)
    expertise = ", ".join(np.random.choice(expertise_pool, size=np.random.randint(1, 3), replace=False))
    vendor_name = get_response([region, expertise], template="generate realistic vendor name")
    contact_name = get_name_response(region, template="generate realistic contact name")
    certifications = ", ".join(np.random.choice(certification_pool, size=np.random.randint(2, 5), replace=False))
    lead_time = np.random.randint(12, 45)

    vendor_rows.append({
        "Vendor Name": vendor_name,
        "Region": region,
        "Contact Name": contact_name,
        "Contact Email": f"{contact_name.lower().replace(' ', '.')}_{i+1}@{vendor_name.lower().replace(' ', '').replace(',', '')}.com",
        "Certifications": certifications,
        "Expertise": expertise,
        "Avg Lead Time (days)": lead_time
    })

# Create DataFrame
df_vendors = pd.DataFrame(vendor_rows)

# Preview
df_vendors.head()

Unnamed: 0,Vendor Name,Region,Contact Name,Contact Email,Certifications,Expertise,Avg Lead Time (days)
0,Spain Flanges Solutions Ltd.,Spain,Rafael Dominguez,rafael.dominguez_1@spainflangessolutionsltd..com,"ISO 9001, Local Safety Compliance","Flanges, Control Cable",13
1,Vietnam Flanges Solutions Ltd.,Vietnam,Thien Bao,thien.bao_2@vietnamflangessolutionsltd..com,"ISO 9001, ASME, Material Test Report, CE","Flanges, Structural Steel",14
2,India – South Fire Rated Door Solutions Ltd.,India – South,Arjun Mehta,arjun.mehta_3@india–southfirerateddoorsolution...,"Material Test Report, ASME, UL Listed",Fire Rated Door,18
3,Japan HVAC Duct Connection Solutions Ltd.,Japan,Hiroshi Tanaka,hiroshi.tanaka_4@japanhvacductconnectionsoluti...,"UL Listed, ISO 9001, CE",HVAC Duct Connection,29
4,Kuwait HVAC Duct Connection Solutions Ltd.,Kuwait,Ali Faris,ali.faris_5@kuwaithvacductconnectionsolutionsl...,"UL Listed, ISO 9001","HVAC Duct Connection, Structural Steel",40


In [16]:
# Mock get_response to simulate LLM output (replace with actual LLM if available)
def get_response(inputs, template, role=None, llm="llama", md=False):
    vendor_name, region, certifications, expertise, lead_time = inputs
    return f"{vendor_name}, based in {region}, has a proven track record delivering {expertise.lower()} with certifications such as {certifications.split(',')[0]} and more. Their average lead time is {lead_time} days."

# Define vendor history template (optional if using inline like above)
def vendor_history_template(inputs):
    vendor_name, region, certifications, expertise, lead_time = inputs
    return f"""Generate a 1–2 sentence vendor history summary for procurement records.
Vendor Name: {vendor_name}
Region: {region}
Certifications: {certifications}
Expertise: {expertise}
Average Lead Time: {lead_time} days
Make the history sound realistic and professional — referencing onboarding, reliability, or performance if possible."""

# Apply LLM-generated history to the dataframe
df_vendors["History"] = df_vendors.apply(
    lambda row: get_response(
        [
            row["Vendor Name"],
            row["Region"],
            row["Certifications"],
            row["Expertise"],
            row["Avg Lead Time (days)"]
        ],
        template=vendor_history_template,
        role="You are a helpful procurement assistant generating brief vendor history notes.",
        llm="llama",
        md=False,
    ),
    axis=1
)

# Preview the updated DataFrame
df_vendors[["Vendor Name", "Region","Contact Name","Contact Email", "Expertise","Certifications", "Avg Lead Time (days)", "History"]].head(20)


Unnamed: 0,Vendor Name,Region,Contact Name,Contact Email,Expertise,Certifications,Avg Lead Time (days),History
0,Spain Flanges Solutions Ltd.,Spain,Rafael Dominguez,rafael.dominguez_1@spainflangessolutionsltd..com,"Flanges, Control Cable","ISO 9001, Local Safety Compliance",13,"Spain Flanges Solutions Ltd., based in Spain, ..."
1,Vietnam Flanges Solutions Ltd.,Vietnam,Thien Bao,thien.bao_2@vietnamflangessolutionsltd..com,"Flanges, Structural Steel","ISO 9001, ASME, Material Test Report, CE",14,"Vietnam Flanges Solutions Ltd., based in Vietn..."
2,India – South Fire Rated Door Solutions Ltd.,India – South,Arjun Mehta,arjun.mehta_3@india–southfirerateddoorsolution...,Fire Rated Door,"Material Test Report, ASME, UL Listed",18,"India – South Fire Rated Door Solutions Ltd., ..."
3,Japan HVAC Duct Connection Solutions Ltd.,Japan,Hiroshi Tanaka,hiroshi.tanaka_4@japanhvacductconnectionsoluti...,HVAC Duct Connection,"UL Listed, ISO 9001, CE",29,"Japan HVAC Duct Connection Solutions Ltd., bas..."
4,Kuwait HVAC Duct Connection Solutions Ltd.,Kuwait,Ali Faris,ali.faris_5@kuwaithvacductconnectionsolutionsl...,"HVAC Duct Connection, Structural Steel","UL Listed, ISO 9001",40,"Kuwait HVAC Duct Connection Solutions Ltd., ba..."
5,China Lighting Fixture Solutions Ltd.,China,Mei Lin,mei.lin_6@chinalightingfixturesolutionsltd..com,Lighting Fixture,"API Monogram, FM Approved, CE, ASME",39,"China Lighting Fixture Solutions Ltd., based i..."
6,Spain Earthing Strip Solutions Ltd.,Spain,Rafael Dominguez,rafael.dominguez_7@spainearthingstripsolutions...,Earthing Strip,"ASME, FM Approved, API Monogram, Material Test...",25,"Spain Earthing Strip Solutions Ltd., based in ..."
7,Spain HVAC Duct Connection Solutions Ltd.,Spain,Rafael Dominguez,rafael.dominguez_8@spainhvacductconnectionsolu...,HVAC Duct Connection,"UL Listed, CE, FM Approved, ASME",34,"Spain HVAC Duct Connection Solutions Ltd., bas..."
8,Spain Control Cable Solutions Ltd.,Spain,Rafael Dominguez,rafael.dominguez_9@spaincontrolcablesolutionsl...,Control Cable,"Local Safety Compliance, UL Listed",22,"Spain Control Cable Solutions Ltd., based in S..."
9,Vietnam Lighting Fixture Solutions Ltd.,Vietnam,Thien Bao,thien.bao_10@vietnamlightingfixturesolutionslt...,"Lighting Fixture, Structural Steel","ASME, FM Approved, Local Safety Compliance",19,"Vietnam Lighting Fixture Solutions Ltd., based..."


In [17]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Seed for reproducibility
np.random.seed(42)

# Number of items to simulate
n_items = 20

# Pools of realistic values
item_names_pool = [
    "Carbon Steel Flange", "Gate Valve", "HVAC Duct Connection", "Cable Tray", "Earthing Strip",
    "Structural Steel Beam", "Fire Rated Door", "Control Cable", "Lighting Fixture", "Signal Conduit"
]

spec_pool = {
    "Carbon Steel Flange": ["ASTM A105, PN16", "ASTM A182, CL150"],
    "Gate Valve": ["API 600, 6-inch", "API 602, 4-inch"],
    "HVAC Duct Connection": ["Flexible, 300mm", "Flexible, 500mm"],
    "Cable Tray": ["Ladder Type, GI", "Perforated, AL"],
    "Earthing Strip": ["Copper, 25mm x 3mm", "GI, 50mm x 6mm"],
    "Structural Steel Beam": ["IPE 300, Hot Rolled", "HEB 200, Mild Steel"],
    "Fire Rated Door": ["120 min fire resistance", "90 min fire rated, insulated"],
    "Control Cable": ["CU/PVC/PVC, 4C x 2.5 sqmm", "XLPE 2C x 4 sqmm"],
    "Lighting Fixture": ["IP66, LED, 230V", "IP54, Fluorescent, 120V"],
    "Signal Conduit": ["PVC, 50mm dia", "HDPE, 32mm dia"]
}

units = {
    "EA": ["Carbon Steel Flange", "Gate Valve", "Structural Steel Beam", "Lighting Fixture", "Fire Rated Door", "HVAC Duct Connection"],
    "MTR": ["Cable Tray", "Earthing Strip", "Control Cable", "Signal Conduit"]
}

drawing_prefixes = {
    "Carbon Steel Flange": "MECH", "Gate Valve": "MECH", "HVAC Duct Connection": "HVAC",
    "Cable Tray": "ELEC", "Earthing Strip": "ELEC", "Structural Steel Beam": "STRC",
    "Fire Rated Door": "ARCH", "Control Cable": "ELEC", "Lighting Fixture": "LGT",
    "Signal Conduit": "SIG"
}

# Generate simulated item data
simulated_items = []

for i in range(n_items):
    item = random.choice(item_names_pool)
    spec = random.choice(spec_pool[item])
    qty = random.randint(30, 1000)
    unit = "EA" if item in units["EA"] else "MTR"
    delivery_date = datetime(2025, 8, 30).strftime("%Y-%m-%d")
    drawing_ref = f"DM-{drawing_prefixes[item]}-{str(i+1).zfill(3)}"

    simulated_items.append({
        "Item Name": item,
        "Specification": spec,
        "Quantity": qty,
        "Unit of Measure": unit,
        "Delivery Date": delivery_date,
        "Drawing Ref": drawing_ref
    })

# Create DataFrame
df_items = pd.DataFrame(simulated_items)

# Preview
df_items


Unnamed: 0,Item Name,Specification,Quantity,Unit of Measure,Delivery Date,Drawing Ref
0,Control Cable,"CU/PVC/PVC, 4C x 2.5 sqmm",917,MTR,2025-08-30,DM-ELEC-001
1,Gate Valve,"API 600, 6-inch",369,EA,2025-08-30,DM-MECH-002
2,Carbon Steel Flange,"ASTM A105, PN16",886,EA,2025-08-30,DM-MECH-003
3,Signal Conduit,"HDPE, 32mm dia",378,MTR,2025-08-30,DM-SIG-004
4,Structural Steel Beam,"HEB 200, Mild Steel",234,EA,2025-08-30,DM-STRC-005
5,HVAC Duct Connection,"Flexible, 500mm",131,EA,2025-08-30,DM-HVAC-006
6,Structural Steel Beam,"HEB 200, Mild Steel",153,EA,2025-08-30,DM-STRC-007
7,Control Cable,"CU/PVC/PVC, 4C x 2.5 sqmm",148,MTR,2025-08-30,DM-ELEC-008
8,HVAC Duct Connection,"Flexible, 300mm",486,EA,2025-08-30,DM-HVAC-009
9,Gate Valve,"API 602, 4-inch",80,EA,2025-08-30,DM-MECH-010
