In [5]:
import pandas as pd
import numpy as np

import random
from faker import Faker


In [6]:
fake = Faker()

# Define the list of companies
companies = [
    "adp", "adp Espelkamp", "adp Lübbecke", "BEIT", "Casino Merkur", "Spielo", 
    "MFL", "MEGA", "Gewete", "Blueprint Gaming"
]

# Function to generate DocumentID consisting of capital letters and digits
def generate_document_id():
    return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=10))

# Function to generate AISNumber
def generate_ais_number(project, year):
    project_code = ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ', k=3))
    subproject_number = f"{random.randint(1, 99999):05}"
    return f"{year}-{project_code}-{subproject_number}"

# State codes and descriptions for SOM
som_state_codes = [10, 30, 50, 70, 80, 90,110]
som_state_texts = ["Neu", "Anfrage in Bearbeitung", "Angebot erstellt", "Anfrage erledigt","Anfrage abgelehnt", "Angebot abgelehnt", "Abgeschlossen"]

stete_codes = []
# Anfrage , Angebot erstellt, Realisierungsphase,Realisierungsphase,Realisierungsphase , ? ,Auftragsabnahme, Testphase, 
state_texts =  ["Erstellen Angebot", "Angebot genehmigen", "Auftragsbestätigung versenden", "Zweite Genehmigung", "Auftragsdurchführung", "Abnahmeprotokoll versenden" , "Auftrag abnehmen", "Kundentest", "Auftrag ist abgeschlossen", "Auftragsabschluiss"]
 
# Full data generation size
full_data_size = 5000  
full_data = []

# Generate SOM entries
for _ in range(full_data_size):
    document_id = generate_document_id()
    company = random.choice(companies)
    creation_date = fake.date_between(start_date="-2y", end_date="today")
    project = fake.bs().split(' ')[0]
    year = creation_date.year
    stateID = random.randint(0,5)
    som_entry = {
        "AISNumber": "",
        "DocumentID": document_id,
        "Company": company,
        "CompetenceCenter": fake.word(),
        "Project": project,
        "Theme": fake.sentence(nb_words=5),
        "Editor": fake.name(),
        "Approver": "",
        "StateText": "",
        "StateCode1": None,
        "StateCode2": None,
        "Requestor": fake.name(),
        "ConceptDeliveryDate": fake.date_between(start_date=creation_date, end_date="+30d"),
        "AgreedDeliveryDate": None,
        "Comment": fake.text(),
        "AmountInEuroByOffer": None,
        "SomStateCode": som_state_codes[stateID],
        "SomStateText": som_state_texts[stateID],
        "TotalAmountInEuro": None,
        "AmountInHoursByOffer": None,
        "ActualAmountInHours": None,
        "OfferDate": None,
        "FinishDate": None,
        "ApprovalDate": None,
        "Coordinator": fake.name(),
        "RequestType": fake.word(),
        "Customer": fake.name(),
        "CreationDate": creation_date,
        "Team": fake.word(),
        "FunctionalDepartment": fake.word(),
        "ProjectID": fake.uuid4(),
        "ProjectTitle": fake.sentence(),
        "Länder": fake.country(),
        "Source": 1  # SOM source
    }
    full_data.append(som_entry)
    
    # Randomly decide if this SOM entry will have a corresponding AIS entry
    if random.choice([True, False]):  # 50% chance to have an AIS entry
        ais_entry = som_entry.copy()
        ais_entry.update({
            "AISNumber": generate_ais_number(project, year),
            "Approver": fake.name(),
            "StateText": random.choice(som_state_texts),  # Assuming AIS uses similar state texts
            "StateCode1": random.randint(1, 100),
            "StateCode2": random.randint(1, 100),
            "AgreedDeliveryDate": fake.date_between(start_date=som_entry["ConceptDeliveryDate"], end_date="+30d"),
            "AmountInEuroByOffer": round(random.uniform(1000, 50000), 2),
            "TotalAmountInEuro": round(random.uniform(10000, 200000), 2),
            "AmountInHoursByOffer": round(random.uniform(10, 200), 2),
            "ActualAmountInHours": round(random.uniform(5, 100), 2),
            "OfferDate": fake.date_between(start_date=som_entry["CreationDate"], end_date="+1y"),
            "FinishDate": fake.date_between(start_date="today", end_date="+1y"),
            "ApprovalDate": fake.date_between(start_date=som_entry["CreationDate"], end_date="today"),
            "Source": 2  # AIS source
        })
        full_data.append(ais_entry)

# Convert the full list of dictionaries into a DataFrame
full_df = pd.DataFrame(full_data)


In [7]:
### Save the generated data to a CSV file
full_df.to_csv("EntryService-MockEntry.csv", index=False)
full_df.to_json("mock_data.json", orient="records", lines=True)