In [24]:
import google.generativeai as genai
import pandas as pd



genai.configure(api_key="")

model = genai.GenerativeModel("gemma-3-12b-it")


In [33]:
import json
import re

In [51]:

prompt = """You are generating a structured ontology for enterprise information systems.
This ontology will be used later to generate synthetic customer support tickets.

TASK:
Generate a hierarchical JSON structure with the following levels:

1. 5 domains (e.g., E-commerce, Healthcare IT, Finance Systems, Telecom, etc.)
2. For each domain, generate exactly 1 realistic companies operating in that domain.
3. For each company, generate exactly 1 core business workflows.
4. For each workflow, generate exactly 2 technical components, and give realistic workflow Description for each of them

REQUIREMENTS:
- All entities must be realistic and enterprise-oriented.
- Company names should sound real but be fictional.
- Workflows should reflect actual business processes.
- Components should be technical or system-level (not people).
- Issues must be specific, actionable, and suitable for support tickets.
- Keep terminology consistent across the hierarchy.
- Do NOT generate ticket text.
- Do NOT include explanations or commentary.

OUTPUT FORMAT:
Return a single valid JSON object with the following schema:

{
  "Domain Name": {
    "companies": {
      "Company Name": {
        "workflows": {
          "Workflow Name", "Workflow Description: {
            "components": {
              "Component Name"
            }
          }
        }
      }
    }
  }
}

IMPORTANT:
- Generate exactly the requested counts at every level.
- Output must be valid JSON."""

In [52]:
response = model.generate_content(prompt)

In [53]:
print(response.text)


```json
{
  "E-commerce": {
    "companies": {
      "NovaRetail Group": {
        "workflows": {
          "Order Fulfillment": {
            "Description": "Automated process for receiving, processing, and shipping customer orders.",
            "components": {
              "Inventory Management System": {},
              "Shipping Logistics API": {}
            }
          },
          "Returns Processing": {
            "Description": "Handles customer returns, including authorization, inspection, and refund/exchange.",
            "components": {
              "Return Authorization Portal": {},
              "Warehouse Management System": {}
            }
          }
        }
      }
    }
  },
  "Healthcare IT": {
    "companies": {
      "Apex Health Systems": {
        "workflows": {
          "Patient Appointment Scheduling": {
            "Description": "Manages patient appointment booking, reminders, and rescheduling.",
            "components": {
              "Electronic

In [58]:
raw = response.text

# Remove BOM if present
raw = raw.lstrip("\ufeff")

# Remove markdown code fences if present
raw = re.sub(r"^```json\s*", "", raw)
raw = re.sub(r"^```\s*", "", raw)
raw = re.sub(r"\s*```$", "", raw)

raw = raw.strip()

data = json.loads(raw)



In [65]:
data_rows = []
for domain, domain_data in data.items():
    for company, company_data in domain_data["companies"].items():
        for workflow, workflow_data in company_data["workflows"].items():
            workflow_description = workflow_data["Description"]
            components = list(workflow_data["components"].keys())  # Extract component names
            
            # For each component, create a row
            for component in components:
                data_rows.append({
                    "Domain Name": domain,
                    "Company Name": company,
                    "Workflow Name": workflow,
                    "Workflow Description": workflow_description,
                    "Component Name": component
                })

# Convert the list of rows into a DataFrame
df = pd.DataFrame(data_rows)

# Display the DataFrame
print(df)

                 Domain Name              Company Name  \
0                 E-commerce          NovaRetail Group   
1                 E-commerce          NovaRetail Group   
2                 E-commerce          NovaRetail Group   
3                 E-commerce          NovaRetail Group   
4              Healthcare IT       Apex Health Systems   
5              Healthcare IT       Apex Health Systems   
6              Healthcare IT       Apex Health Systems   
7              Healthcare IT       Apex Health Systems   
8            Finance Systems   Veridian Financial Corp   
9            Finance Systems   Veridian Financial Corp   
10           Finance Systems   Veridian Financial Corp   
11           Finance Systems   Veridian Financial Corp   
12                   Telecom     Summit Communications   
13                   Telecom     Summit Communications   
14                   Telecom     Summit Communications   
15                   Telecom     Summit Communications   
16  Logistics 

In [84]:
random_rows = df.sample(n=10, random_state=42)

for row in random_rows.to_dict('records'):
    prompt_ticket=f"""You are generating  realistic customer support ticket for an enterprise system.
        
        
    CONTEXT:
    - Domain: {row['Domain Name']}  (e.g., E-commerce, CRM)
    - Company Name: {row['Company Name']}  (e.g., Retail, Telecom)
    - Workflow Description: {row['Workflow Description']} 
    - Workflow: {row['Workflow Name']}  (e.g., Checkout, Payment Processing)
    - component: {row['Component Name']}  (e.g., Payment Gateway, User Management)
        
    TASK:
    -Write a natural-language support ticket describing the issue. 
    -The ticket text should sound like a real user complaint. 
    - Vary writing style, length, and tone.
    -The tone, language, and level of detail should reflect the user's expertise:
    A novice user will use simple language and ask for more guidance.
    An expert will use more technical terms and might ask more specific questions.
        
    Do NOT mention the system type, industry, workflow, or issue category in the ticket text.
    Give a description to the issue from 1 to 3 sentences
        
        
    OUTPUT:
    in VALID JSON FORMAT 
    -Return ticket text in natural language, without any additional explanation or metadata.
    -Affected component
    -severity of the problem"""


    response2 = model.generate_content(prompt_ticket)
    print(response2.text or "[EMPTY RESPONSE]")




```json
{
  "ticket_text": "We're seeing a really strange discrepancy with stock levels. The system is showing we have 15 units of the 'Deluxe Coffee Maker' in stock, but when we try to fulfill orders, it keeps telling us we're out. We've manually checked the warehouse and confirmed we have at least 20, so something's clearly wrong with the numbers being reported.",
  "affected_component": "Inventory Management System",
  "severity": "High"
}
```
```json
{
  "ticket_text": "I'm trying to track a shipment using the tracking number XYZ123456789, but the portal keeps showing 'Data unavailable'. I've refreshed the page multiple times and double-checked the tracking number, so I'm pretty sure it's entered correctly. Is there a known issue with the system, or am I missing something?",
  "affected_component": "Shipment Visibility Portal",
  "severity": "Medium"
}
```
```json
{
  "ticket_text": "Hi, I'm seeing some really strange discrepancies in the invoices generated for several of our busin

In [85]:
print(random_rows)

                 Domain Name              Company Name  \
0                 E-commerce          NovaRetail Group   
17  Logistics & Supply Chain  Global Transit Solutions   
15                   Telecom     Summit Communications   
1                 E-commerce          NovaRetail Group   
8            Finance Systems   Veridian Financial Corp   
5              Healthcare IT       Apex Health Systems   
11           Finance Systems   Veridian Financial Corp   
3                 E-commerce          NovaRetail Group   
18  Logistics & Supply Chain  Global Transit Solutions   
16  Logistics & Supply Chain  Global Transit Solutions   

                       Workflow Name  \
0                  Order Fulfillment   
17                 Shipment Tracking   
15          Billing Cycle Processing   
1                  Order Fulfillment   
8        Loan Application Processing   
5     Patient Appointment Scheduling   
11                   Fraud Detection   
3                 Returns Processing   
1