In [1]:
import sys
from pathlib import Path
import os
# Add project root to sys.path
sys.path.append(str(Path().resolve().parent))  # Adjust if needed

import numpy as np
import pandas as pd
from datetime import datetime as dt
from db.db_operations import execute_db_operations
from io import StringIO

from generators.llm_context_generators import (
    generate_year_end_report_with_web_llm,
    generate_context_numbers_llm,
    generate_year_end_report_from_pdf,
)

from generators.full_generators import (
    create_company_data
)

company_name = "LEGO"
count_employee = 50
count_product = 50


In [2]:
data = generate_context_numbers_llm(company_name)

In [3]:
data

{'company_name': 'LEGO',
 'count_employee': 300,
 'count_department': 15,
 'count_customer': 150,
 'count_product': 90,
 'count_procurement': 90,
 'count_service': 70,
 'count_account': 145,
 'estimated_product': 233000000,
 'estimated_service': 97400000,
 'estimated_overhead': 38900000,
 'estimated_revenue': 649000000}

In [None]:
data = create_company_data(company_name=company_name, save_to_csv=True)

Generating data for company: LEGO...
Time estimates: 5-10 minutes for context, 15-25 minutes for full data generation.
Using existing context report for LEGO.

=== Attempt 1 ===
✔ Roles and Names generated.
✔ Procurement data generated.
✔ Services data generated.
✔ Products data generated.
✖ Generation failed: Runtime error: Request timed out.

=== Attempt 2 ===
✔ Roles and Names generated.
✖ Generation failed: Runtime error: Request timed out.

=== Attempt 3 ===


In [3]:
data = generate_context_numbers_llm(company_name=company_name)

In [4]:
data

{'company_name': 'LEGO',
 'count_employee': 1000,
 'count_department': 10,
 'count_customer': 5000,
 'count_product': 200,
 'count_procurement': 120,
 'count_service': 60,
 'count_account': 180,
 'estimated_product': 35000000,
 'estimated_service': 2000000,
 'estimated_overhead': 442000000,
 'estimated_revenue': 5000000000}

In [15]:
company_name = "LEGO"

In [5]:
generate_context_report(company_name=company_name)

Loading and reading year-end report for LEGO.
This usually takes around 5 mins...
Year-end report for LEGO is ready.


'**LEGO Group Denmark: 2024 Year-End Financial Summary**\n\nIn 2024, the LEGO Group’s Denmark operations delivered a robust financial performance, building on a strong global market presence and continued strategic investment. The year saw record revenues and profits, reflecting both resilient consumer demand and disciplined operational management. Market share increased globally, highlighting the strength of LEGO’s brand and product innovation.\n\nRevenue for the Denmark scope reached DKK 649 million, up from DKK 596 million in 2023, representing a year-on-year growth of 8.9%. Gross profit mirrored revenue, at DKK 649 million (DKK 596 million in 2023), demonstrating effective management of production costs and pricing. Operating profit held steady at DKK 207 million, compared with DKK 210 million in the prior year, as LEGO balanced investments with cost control.\n\nAdministrative expenses rose to DKK 442 million in Denmark (DKK 386 million in 2023), reflecting the cost of supporting e

In [2]:
data = generate_context_numbers_llm(company_name=company_name)

In [3]:
data

{'company_name': 'LEGO',
 'count_employee': 5,
 'count_department': 3,
 'count_customer': 30,
 'count_product': 0,
 'count_procurement': 15,
 'count_service': 4,
 'count_account': 120,
 'estimated_product': 0,
 'estimated_service': 152000000,
 'estimated_overhead': 290000000,
 'estimated_revenue': 649000000}

In [None]:
with open(f"data/inputdata/reports/generated/{company_name}_context_report.txt", "w", encoding="utf-8") as f:
    f.write(str(report))

In [None]:
data = create_company_data(company_name, save_to_csv=True)

In [3]:
data = generate_context_numbers_llm("Mærsk")

count_employee = data['count_employee'] 
count_product = data['count_product']
count_department = data['count_department']
count_department = data['count_department']
count_procurement = data['count_procurement']
count_service = data['count_service']
count_account = data['count_account']
count_customer = data['count_customer']

In [4]:
data

{'company_name': 'Mærsk',
 'country': 'Denmark',
 'count_employee': 25,
 'count_department': 5,
 'count_customer': 60,
 'count_product': 12,
 'count_procurement': 30,
 'count_service': 8,
 'count_account': 10,
 'estimated_product': 1200000,
 'estimated_service': 800000,
 'estimated_overhead': 700000,
 'estimated_revenue': 2300000}

In [5]:
data = create_company_data(company_name, count_employee=count_employee, count_product=count_product, save_to_csv=True)

Generating data for company: Mærsk...
✔ Roles and Names generated.
✔ Procurement data generated.
✔ Services data generated.
✔ Products data generated.
✔ Accounts, Customers, Departments, and Vendors generated.
✔ Payroll data generated.
✔ All CSVs saved to: data/outputdata/
✔ All mapping data generated.
✔ All mapping CSVs saved to: data/outputdata/mapping
YESYES
✔ All erp-data generated.
✔ All ERP CSVs saved to: data/outputdata/fact
✔ All ERP data and mapping generated for company: Mærsk


In [6]:
version_tag = company_name.lower() + dt.now().strftime("%h%m%d%M")
execute_db_operations(version_tag)

[INFO] Inserting 10 rows into dim_department using to_sql...
[INFO] Inserting 30 rows into dim_customer using to_sql...
[INFO] Inserting 100 rows into dim_product using to_sql...
[INFO] Inserting 30 rows into dim_account using to_sql...
[INFO] Inserting 50 rows into dim_procurement using to_sql...
[INFO] Inserting 50 rows into dim_service using to_sql...
[INFO] Inserting 14 rows into dim_line using to_sql...
[INFO] Inserting 30 rows into dim_vendor using to_sql...


  .replace(mapping)


[INFO] Inserting 50 rows into dim_employee using to_sql...


  .replace(mapping)


[INFO] Inserting 50400 rows into fact_payroll using to_sql...


  .replace(mapping)
  .replace(mapping)
  .replace(mapping)
  .replace(mapping)
  .replace(mapping)
  .replace(mapping)


[INFO] Inserting 6180 rows into fact_general_ledger using to_sql...


In [3]:
%pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
data = generate_year_end_report_with_web_llm("Mærsk")

BadRequestError: Error code: 400 - {'error': {'message': "Unsupported value: 'temperature' does not support 0.4 with this model. Only the default (1) value is supported.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_value'}}