## Analytic 41 Code

#### OPIM5770 | Fall 2018 | Team 4

###### This notebook contains code to generate file necessary for input to analytic 41. Designed by Team 4.

In [1]:
# Import required modules
import pandas as pd
import numpy as np
import os

In [2]:
# Load the BSAK_BKPF (Note: with date parsing enabled, may take some time to complete)
parse_dates = [ 'Document_Date_in_Document'
               ,'Clearing_Date'
               ,'Day_On_Which_Accounting_Document_Was_Entered']

BSAK_BKPF_DF = pd.read_csv(r'./../../src/BSAK_BKPF.csv' 
                           , sep="|"
                           , quotechar="'"
                           , low_memory=False
                           , encoding='latin1'
                           , usecols = [ 
                                     'Company_Code',
                                     'Account_Number_of_Vendor_or_Creditor',
                                     'Reference_Document_Number',
                                     'Accounting_Document_Number',
                                     'Document_Type',
                                     'Number_of_Line_Item_Within_Accounting_Document',
                                     'Document_Number_of_the_Clearing_Document',
                                     'Reference_Document_Number',
                                     'Fiscal_Year',
                                     'Document_Date_in_Document',
                                     'Clearing_Date',
                                     'Day_On_Which_Accounting_Document_Was_Entered',
                                     'Debit_Credit_Indicator',
                                     'Currency_Key',
                                     'Amount_in_Document_Currency',
                                     'Amount_in_Local_Currency',
                                     'Reverse_Document_Number'
                                     ]
                           , dtype = { 
                                     'Company_Code': str,
                                     'Account_Number_of_Vendor_or_Creditor': str,
                                     'Reference_Document_Number': str,
                                     'Accounting_Document_Number': str,
                                     'Document_Type': str, 
                                     'Number_of_Line_Item_Within_Accounting_Document':int,
                                     'Document_Number_of_the_Clearing_Document': str,
                                     'Reference_Document_Number':str,
                                     'Fiscal_Year':int,
                                     'Document_Date_in_Document':str, # REQUIRES DATE PARSING
                                     'Clearing_Date': str, # REQUIRES DATE PARSING
                                     'Day_On_Which_Accounting_Document_Was_Entered': str, #REQUIRES DATE PARSING
                                     'Debit_Credit_Indicator': str,
                                     'Currency_Key':str,
                                     'Amount_in_Document_Currency': float,
                                     'Amount_in_Local_Currency':float,
                                     'Reverse_Document_Number':str
                                   }
                          , parse_dates=parse_dates)

BSAK_BKPF_DF.rename(columns=
          {
              'Company_Code': 'COMPANY_CODE',
              'Account_Number_of_Vendor_or_Creditor': 'VENDOR_ID',
              'Reference_Document_Number': 'VENDOR_INVOICE_NUMBER',
              'Accounting_Document_Number': 'ACCOUNTING_DOC_NUM',
              'Document_Type': 'DOCUMENT_TYPE',
              'Number_of_Line_Item_Within_Accounting_Document':'LINE_ITEM_ACCT_DOC_NUM',
              'Document_Number_of_the_Clearing_Document':'CLEARING_DOC_NUM',
              'Reference_Document_Number': 'REFERENCE_DOC_NBR',
              'Fiscal_Year': 'FISCAL_YEAR',
              'Document_Date_in_Document': 'DOCUMENT_DATE',
              'Clearing_Date': 'CLEARING_DATE',
              'Day_On_Which_Accounting_Document_Was_Entered': 'DAY_DOC_ENTERED',
              'Debit_Credit_Indicator': 'DR_CR_INDICATOR',
              'Currency_Key': 'DOC_CURRENCY_INDICATOR',
              'Amount_in_Document_Currency': 'INVOICE_                    AMT_DOC_CURRENCY',
              'Amount_in_Local_Currency': 'INVOICE_AMT_REPORT_CURRENCY',
              'Reverse_Document_Number': 'REVERSE_DOC_NBR',
          }, inplace=True)

In [3]:
# Filter out unnecessary data types
BSAK_BKPF_DF = BSAK_BKPF_DF[BSAK_BKPF_DF['DOCUMENT_TYPE'] != 'ZP']
BSAK_BKPF_DF = BSAK_BKPF_DF[BSAK_BKPF_DF['DOCUMENT_TYPE'] != 'KZ']

In [4]:
# Load the T001 dataframe
T001_DF = pd.read_csv(r'./../../src/T001.csv',
                     usecols = [ 'Company_Code', 
                                 'Name_of_Company_Code_or_Company',
                                 'Currency_Key'],
                     dtype = {  'Company Code': str,
                                 'Name_of_Company_Code_or_Company': str ,
                                 'Currency_Key': str
                              }
                     )

T001_DF = T001_DF.rename(columns= {
                    'Company_Code': 'COMPANY_CODE',
                    'Name_of_Company_Code_or_Company': 'COMPANY_NAME',
                    'Currency_Key': 'REPORT_CURRENCY_INDICATOR',
                    })

In [5]:
# Perform join operation
joinDF = pd.merge(BSAK_BKPF_DF,
                  T001_DF,
                 left_on='COMPANY_CODE',
                 right_on='COMPANY_CODE',
                 how='left')

In [6]:
# Add requested computed fields
joinDF['DAY_OF_WEEK'] = joinDF['DAY_DOC_ENTERED'].dt.weekday

In [7]:
# Print table head to confirm
joinDF.head()

Unnamed: 0,COMPANY_CODE,VENDOR_ID,CLEARING_DATE,CLEARING_DOC_NUM,FISCAL_YEAR,ACCOUNTING_DOC_NUM,LINE_ITEM_ACCT_DOC_NUM,DOCUMENT_DATE,DAY_DOC_ENTERED,DOC_CURRENCY_INDICATOR,REFERENCE_DOC_NBR,DOCUMENT_TYPE,DR_CR_INDICATOR,INVOICE_AMT_REPORT_CURRENCY,INVOICE_ AMT_DOC_CURRENCY,REVERSE_DOC_NBR,COMPANY_NAME,REPORT_CURRENCY_INDICATOR,DAY_OF_WEEK
0,1001,20660,2018-02-17,2900443156,2018,1900009335,1,2018-01-31,2018-02-16,USD,013118CDV,KR,H,15826.25,15826.25,,CDIY US INC,USD,4
1,1001,20660,2018-04-04,2900453409,2018,1900019966,1,2018-03-04,2018-04-03,USD,040318TRAV,KR,H,30904.38,30904.38,,CDIY US INC,USD,1
2,1001,20660,2018-03-10,2900447675,2018,1900012919,1,2018-02-28,2018-03-05,USD,022818CDV,KR,H,20920.0,20920.0,,CDIY US INC,USD,0
3,1001,20660,2018-05-22,2900463376,2018,1900029522,1,2018-05-18,2018-05-21,USD,051818TRAV,KR,H,36450.77,36450.77,,CDIY US INC,USD,0
4,1001,20660,2018-04-05,2900453546,2018,1900018246,1,2018-03-26,2018-03-26,USD,032618TRAV,KR,H,2866.93,2866.93,,CDIY US INC,USD,0


In [8]:
# Write out the result to CSV
joinDF.to_csv(r'./../output/A41_Base.csv', index=False)