In [1]:
import uuid
import random
import pandas as pd
import numpy as np
from datetime import datetime
from create_sap_table import create_table_leanx as ctl

import values, helpers
import master_data, text_data, purchasing_doc_data

In [2]:
# # text tables
# for method in (
#     text_data.domain_fixed_values, 
#     text_data.sales_doc_types, 
#     text_data.sales_organizations,
#     text_data.distribution,
#     text_data.sales_doc_item_categories,
#     text_data.sales_doc_rejection_reasons,
#     text_data.system_status,
#     text_data.blocking_reasons,
#     text_data.releases,
#     text_data.purchasing_doc_types,
#     text_data.purchasing_organizations
# ):
#     table_dict = method()
#     for k, v in table_dict.items():
#         table_name = k.split('_')[0]
#         all_cols = pd.DataFrame(columns=[c[0] for c in ctl.fetch_table(table_name)])
#         df = pd.concat([all_cols, pd.DataFrame(v.values())])
#         df.to_csv(f'data/P2P/OCPM/text/{table_name}.csv', index=False)

In [3]:
# # master tables
# for method in (
#     master_data.users,
#     master_data.customers_and_vendors, 
#     master_data.plants, 
#     master_data.materials, 
#     master_data.material_support,
#     master_data.routes,
#     master_data.company_codes
# ):
#     table_dict = method()
#     for k, v in table_dict.items():
#         table_name = k.split('_')[0]
#         all_cols = pd.DataFrame(columns=[c[0] for c in ctl.fetch_table(table_name)])
#         df = pd.concat([all_cols, pd.DataFrame(v.values())])
#         df.to_csv(f'data/P2P/OCPM/master/{table_name}.csv', index=False)

In [4]:
def get_user_name(automation_probability: float):
    if random.random() <= automation_probability:
        return 'BATCH_JOB'
    return random.choice(list(values.om_users.keys()))

def get_time_consumption(start_date, planned_target_date, latest_date):
    return (latest_date - start_date) / (planned_target_date - start_date)

In [5]:
MARC = pd.read_csv('data/P2P/OCPM/master/MARC.csv')
MARA = pd.read_csv('data/P2P/OCPM/master/MARA.csv')
MAKT = pd.read_csv('data/P2P/OCPM/master/MAKT.csv')
LFB1 = pd.read_csv('data/P2P/OCPM/master/LFB1.csv')
LFA1 = pd.read_csv('data/P2P/OCPM/master/LFA1.csv')

all_prices = {}
all_has_contract_probabilities = {}
all_matnr_names = {}
all_availabilities = {}

# get price and material group by MATNR
for nr in MARA['MATNR']:
    name = MAKT[MAKT['MATNR'] == nr]['MAKTX'].values[0]
    for k, v in values.om_material_groups.items():
        for mgrp, attr in v.items():
            for mat, details in attr['materials'].items():
                if name == mat:
                    all_prices[nr] = details['price']
                    all_matnr_names[nr] = mat
                    all_has_contract_probabilities[nr] = attr['has_contract_probability']
                    all_availabilities[nr] = attr['availability']

def get_params():
    company_code = random.choice(list(values.om_company_codes.keys()))
    plant = random.choice(values.om_company_codes[company_code]['plants'])
    konnr = f'{str(uuid.uuid4())[-15:]}'
    lifnr = random.choice(list(LFB1[LFB1['BUKRS'] == company_code]['LIFNR']))
    vendor_name = LFA1[LFA1['LIFNR'] == lifnr]['NAME1'].values[0]

    all_matnrs = MARC[MARC['WERKS'] == plant]['MATNR'].unique()
    matnrs = random.sample(list(all_matnrs), min(random.randint(5, 25), len(all_matnrs)))
    quantities = [random.randint(12, 60)*12 for _ in range(len(matnrs))] # x dozens of everything
    
    prices = []
    has_contract_probabilities = []
    availabilities = []
    delivery_status_boundaries = []
    for i in range(len(matnrs)):
        prices.append(all_prices[matnrs[i]])
        has_contract_probabilities.append(all_has_contract_probabilities[matnrs[i]])

# --------------------------------------------------------------------------
    # Logic by Tim von Luecken    
    for i in range(len(matnrs)):
        availabilities.append(all_availabilities[matnrs[i]])
        a = all_availabilities[matnrs[i]]

        #[Probability of being late, Prabability of being late + Prob of being on Time]
        # Prob of late = 0.7-0.65*a, Prob of On-Time = 0.25+0.55*a, Prob of Early = 0.05 +0.1*a
        # -> Boundaries CASE WHEN random number < Prob of late Then LATE, CASE WHEN random number > Prob of Late AND <Prob of Late + Prob of ON-Time THEN ON-Time ELSE Early END
        delivery_status_boundaries.append([0.7-0.65*a,0.95-0.1*a])

    item_delivery_status = []
    for i in range(len(delivery_status_boundaries)):
        late_bound, ot_bound = delivery_status_boundaries[i][0], delivery_status_boundaries[i][1]
        r = random.random()
        if r < late_bound:
            item_delivery_status.append({'status': 'late', 'prob': r})
        elif r >= late_bound and r<ot_bound:
            item_delivery_status.append({'status': 'ot', 'prob': r-late_bound})
        else:
            item_delivery_status.append({'status': 'early', 'prob': r-ot_bound})
# --------------------------------------------------------------------------
        
    item_has_contract = [True if random.random() < has_contract_probabilities[j] else False for j in range(len(has_contract_probabilities))]

    has_contract = True 
    for hc in item_has_contract:
        if hc == False:
            has_contract = False
            break
    
    has_pr_price_mismatch = False
    pr_prices = [p*(1+min(0.5,random.random())) for p in prices]

    requested_by = get_user_name(0.1)
    
    purchasing_org=random.choice(list(values.proc_purchasing_orgs.keys()))
    new_lifnr = random.choice(list(LFB1[LFB1['BUKRS'] == company_code]['LIFNR']))
    payment_term = random.choice(list(LFB1[LFB1['LIFNR'] == lifnr]['ZTERM']))
    

    is_free_text = True if random.random() < values.om_company_codes[company_code]['free_text_pr_probability'] else False
    free_text_materials = [all_matnr_names[matnr] for matnr in all_matnrs] # TODO change with material description

    has_qty_change = True if values.om_company_codes[company_code]['incorrect_qty_prbobability'] > 0.5 else False
    is_delivered_late = True if values.proc_vendors[vendor_name]['late_delivery_rate'] > 0.5 else False
    is_delivered_early = True if values.proc_vendors[vendor_name]['early_delivery_rate'] > 0.5 else False

    params = {
        'matnrs': matnrs,
        'konnr': konnr,
        'lifnr': lifnr,
        'plant': plant,
        'quantities': quantities,
        'prices': prices, # considered contract and PO prices
        'company_code': company_code,
        'purchasing_org': purchasing_org,
        'payment_term': payment_term,
        'requested_by': requested_by,
        'item_has_contract': item_has_contract,
        'has_contract': has_contract,
        'has_pr_price_mismatch': False, # not applied in Contrace Usage use case - use if needed for Contract Leakage
        'pr_prices': [], # not applied in Contrace Usage use case - use if needed for Contract Leakage
        'is_free_text': is_free_text,
        'free_text_materials': free_text_materials,
        'has_qty_change': has_qty_change,
        'is_delivered_late': is_delivered_late,
        'is_delivered_early': is_delivered_early,
        'delivery_status': item_delivery_status,

        
        'new_payment_term': 'Z090',
        'new_vendor': new_lifnr,
    }

    return params

In [7]:
# get_params()

In [8]:
# x = []
# for i in range(10_000):
#     # x.append(get_params()['is_free_text'])
#     x += get_params()['item_has_contract']
# y = [i for i in x if i == True]

In [9]:
# len(y)/len(x)

In [10]:
# np.eye(9).round(2)

In [11]:
transition_prob = np.array([
		[1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00]
	])

In [12]:
purchasing_doc_tables = {
    'EBAN_json': {}, 
	'CDHDR_json': {},
	'CDPOS_json': {},
	'EKKO_json': {},
	'EKPO_json': {},
	'NAST_json': {},
	'MSEG_json': {},
	'EKBE_json': {},
	'RBKP_json': {},
	'RSEG_json': {},
	'EKET_json': {},
}
for i in range(1_000):
	transition_prob = np.array([
		[0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.02], # Approve PO
		[0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], # Create PO
		[0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00],
		[0.00, 0.00, 0.00, 0.00, 1.00, 0.00, 0.00, 0.00, 0.00], # Post Goods Receipt
		[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00], # Create Vendor Invoice

		[0.00, 0.00, 0.00, 0.95, 0.00, 0.00, 0.00, 0.00, 0.05], # Change Payment Term
		[0.85, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.15], # Change Vendor
		[0.00, 0.00, 0.95, 0.00, 0.00, 0.00, 0.00, 0.00, 0.05], # Change Quantity

		[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00] # Terminate
	])
	"""
	0: Approve PO
	1: Create PO
	2: Send PO
	3: Post GR
	4: Create Vendor Invoice

	5: Change Payment Term
	6: Change Vendor
	7: Change Quantity
	
	8: Terminate
	"""

	# Create Contract
	# Create PR
	
	params = get_params()
	latest_date = helpers.generate_random_date(start_date=datetime(2021, 1, 1), end_date=datetime(2023, 9, 1))
	latest_time = helpers.generate_random_time()
	purchasing = purchasing_doc_data.Purchasing(params=params, start_date=latest_date, index=i)
	preq_creation_date = latest_date

	purchasing.create_contract(
		aedat=latest_date,
		ernam=get_user_name(0.3)
	)

	pr_automation_rate = 0.2 if params['is_free_text'] else 0.7
	purchasing.create_purchase_requisition_item(
		badat=preq_creation_date, 
		ernam=get_user_name(pr_automation_rate),
	)

	step = 1 # Create PO
	if (params['is_free_text']) or (not params['has_contract']):
		step = 0 # Approve PO
		# possibly lead to Vendor Change
		if random.random() < 0.3:
			step = 6
			latest_date += helpers.UPTO_WEEK()
	else:
		latest_date += helpers.UPTO_DAY()

	while step != 8:
		if step == 0:
			purchasing.approve_purchase_order(
				aedat=latest_date,
				ernam = get_user_name(0.2)
			)
			latest_date += helpers.UPTO_WEEK()

		elif step == 1:
			purchasing.create_purchase_order(
				aedat=latest_date,
				ernam=get_user_name(0.8)
			)
			latest_date += helpers.UPTO_DAY()

			if params['has_qty_change']: # 50% chance of Change Quantity
				transition_prob[step][7] = 1.0

		elif step == 2:
			purchasing.send_purchase_order(
				usnam=get_user_name(0.9),
				erdat=latest_date
			)
			latest_date += helpers.UPTO_DAY()

			if params['is_delivered_late'] or params['is_delivered_early']:
				transition_prob[step][5] = 1.0

		elif step == 3:
			purchasing.post_goods_receipt(
				cpudt=latest_date,
				usnam=get_user_name(0.7),
				atime=latest_time
			)
			latest_date += helpers.UPTO_WEEK()

		elif step == 4:
			purchasing.create_vendor_invoice(
				cupdt=latest_date,
				ernam=get_user_name(0.9),
			)

		elif step == 5:
			purchasing.change_payment_term( # likely goes next to 'Post GR'
				udate=latest_date,
				ernam=get_user_name(0.2)
			)
			latest_date += helpers.UPTO_MONTH()

		elif step == 6:
			purchasing.change_vendor( # likely goes to 'Approve PO'
				udate=latest_date,
				ernam=get_user_name(0.2)
			)
			latest_date += helpers.UPTO_WEEK()

		elif step == 7:
			old_quantities = params['quantities']
			new_quantity_lines = random.sample(range(len(old_quantities)), random.randint(0, len(old_quantities)))
			new_quanity_quantities = [old_quantities[j]*(1+random.random()) for j in new_quantity_lines]
			purchasing.change_quantity( # likely goes to 'Send PO'
				badat=latest_date,
				ernam=get_user_name(0.1),
				line_numbers=new_quantity_lines,
				line_quantities=new_quanity_quantities
			)
			latest_date += helpers.UPTO_WEEK()
		
		# normalize transition matrix
		transition_prob = transition_prob / transition_prob.sum(axis=1, keepdims=True)
		step = np.random.choice(9, p=transition_prob[step])

	for k, v in purchasing.tables.items():
		for entry_key in list(v.keys()):
			purchasing_doc_tables[k][entry_key] = purchasing.tables[k][entry_key]

In [12]:
for table, rows in purchasing_doc_tables.items():
    table_name = table.split('_')[0]
    all_cols = pd.DataFrame(columns=[c[0] for c in ctl.fetch_table(table_name)])
    df = pd.concat([all_cols, pd.DataFrame(rows.values())])
    df.to_csv(f'data/P2P/OCPM/purchasing-document/{table_name}.csv', index=False)