In [158]:
import os
import pandas as pd
import json
from datetime import datetime
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Load JSON inputs
params_df = pd.read_json("json_outputs/customer_params_df_clean.json", lines=True)
categories_df = pd.read_json("json_outputs/customer_categories_df_clean.json", lines=True)
regions_df = pd.read_json("json_outputs/customer_regions_df_clean.json", lines=True)
payments_df = pd.read_json("json_outputs/payment_lines_clean.json", lines=True)
rep_df = pd.read_json("json_outputs/representatives_clean.json", lines=True)[['REP_CODE', 'COMM_METHOD', 'COMMISSION', 'REP_DESC_CLEAN', 'REP_GROUP']]
customer_df = pd.read_json("json_outputs/customer_df_clean.json", lines=True)[[
    'CUSTOMER_NUMBER', 'CCAT_CODE', 'REGION_CODE', 'REP_CODE',
    'SETTLE_TERMS', 'NORMAL_PAYTERMS', 'DISCOUNT', 'CREDIT_LIMIT'
]]

In [159]:
csv_folder = os.path.join(os.getcwd(), "csv_outputs")
json_folder = os.path.join(os.getcwd(), "json_outputs")

In [160]:
# Load customer master (fact table)
customer_df = pd.read_json("json_outputs/customer_df_clean.json", lines=True)

# Merge rep info into master
customer_df = customer_df.merge(rep_df, on="REP_CODE", how="left")

In [161]:
# Merge core customer data
merged_df = customer_df \
    .merge(params_df[['CUSTOMER_NUMBER', 'PARAMETER', 'PARAMETER_GROUP']], on="CUSTOMER_NUMBER", how="left") \
    .merge(regions_df, on="REGION_CODE", how="left") \
    .merge(categories_df, on="CCAT_CODE", how="left")

In [162]:
merged_df.shape

(2762, 18)

In [163]:
merged_df.columns.tolist() 

['CUSTOMER_NUMBER',
 'CCAT_CODE',
 'REGION_CODE',
 'REP_CODE',
 'SETTLE_TERMS',
 'NORMAL_PAYTERMS',
 'DISCOUNT',
 'CREDIT_LIMIT',
 'COMM_METHOD',
 'COMMISSION',
 'REP_DESC_CLEAN',
 'REP_GROUP',
 'PARAMETER',
 'PARAMETER_GROUP',
 'REGION_DESC',
 'PROVINCE',
 'CCAT_DESC',
 'CCAT_GROUP']

In [164]:
merged_df.drop(columns=[
    "PARAMETER_GROUP"
], inplace=True, errors="ignore")

In [165]:
merged_df.shape

(2762, 17)

In [166]:
merged_df.columns.tolist() 

['CUSTOMER_NUMBER',
 'CCAT_CODE',
 'REGION_CODE',
 'REP_CODE',
 'SETTLE_TERMS',
 'NORMAL_PAYTERMS',
 'DISCOUNT',
 'CREDIT_LIMIT',
 'COMM_METHOD',
 'COMMISSION',
 'REP_DESC_CLEAN',
 'REP_GROUP',
 'PARAMETER',
 'REGION_DESC',
 'PROVINCE',
 'CCAT_DESC',
 'CCAT_GROUP']

In [167]:
merged_df

Unnamed: 0,CUSTOMER_NUMBER,CCAT_CODE,REGION_CODE,REP_CODE,SETTLE_TERMS,NORMAL_PAYTERMS,DISCOUNT,CREDIT_LIMIT,COMM_METHOD,COMMISSION,REP_DESC_CLEAN,REP_GROUP,PARAMETER,REGION_DESC,PROVINCE,CCAT_DESC,CCAT_GROUP
0,AACJ01,21,25b,ZZZ5,0,90,0,999999,,,,,Closed,,,,
1,AACJC1,21,25b,ZZZ5,0,120,0,999999,,,,,Closed,,,,
2,AACJC2,5,20a,CONS4,0,120,0,999999,Gross Profit,0.5,CONSIGNMENT,Channel: Consignment,Promotion,,,Consignment,Channel: Consignment
3,AADPRG,6,21a,XX,0,120,0,999999,Gross Profit,0.0,CONSIGNMENT,Channel: Consignment,,,,Advertising Appro,Internal: Advertising
4,AAMI01,41,10a,02,0,120,0,2000,Sales,0.5,R,Sales Rep,,Durban,KwaZulu-Natal,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2757,ZHAY02,19,4b,03,0,120,0,2000,Sales,0.5,BJ,Sales Rep,,Nelspruit / Tzaneen,Mpumalanga,,
2758,ZMAU01,37,11a,03,0,120,0,0,Sales,0.5,BJ,Sales Rep,,Free State / Lesotho,Free State,,
2759,ZNAE01,46,2b,05,0,120,0,30000,Sales,0.5,RL,Sales Rep,,Krugersdorp / Sun City,North West,,
2760,ZNAEOC,5,20a,STAND,0,120,0,999999,Gross Profit,0.0,CONSIGNMENT_STANDS,Channel: Consignment,Consignment,,,Consignment,Channel: Consignment


In [168]:
merged_df.to_csv(os.path.join(csv_folder, "customer_merged.csv"), index=False)
merged_df.to_json(os.path.join(json_folder, "customer_merged.json"), orient="records", lines=True)