In [284]:
import pandas as pd
import numpy as np

In [26]:
#CUSTOMER ORDER DATA PREPARATION FUNCTION
def customer_order_data_prep(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_csv(filepath, sep='\t')
    #Creating a new header in which '-' in column names are replaced by '_' and uses new header in dataframe  
    columns_new = {}
    for col in df.columns:
        col_new = col.replace('-','_')
        columns_new[col] = col_new
    df.rename(columns=columns_new, inplace=True)

    #Dropping irrelevant columns
    df.drop(['last_updated_date', 
             'order_channel', 
             'url', 
             'ship_city', 
             'ship_state', 
             'ship_postal_code', 
             'ship_country', 
             'purchase_order_number', 
             'price_designation', 
             'is_sold_by_ab '
            ], 1, inplace=True)
    
    #Renaming columns 
    df.columns = ['AMAZON_ORDER_ID', 
                   'MERCHANT_ORDER_ID', 
                   'ORDER_DATETIME_STRING', 
                   'ORDER_STATUS', 
                   'FULLFILLMENT_CHANNEL', 
                   'SALES_CHANNEL', 
                   'SHIPPING_SERVICE_LEVEL', 
                   'PRODUCT_NAME', 
                   'SKU', 
                   'ASIN', 
                   'ITEM_STATUS', 
                   'QUANTITY_ORDERED', 
                   'ORDER_CURRENCY', 
                   'ORDER_ITEM_PRICE',
                   'ORDER_ITEM_TAX',
                   'ORDER_SHIPPING_PRICE',
                   'ORDER_SHIPPING_TAX',
                   'ORDER_GIFT_WRAP_PRICE',
                   'ORDER_GIFT_WRAP_TAX',
                   'ORDER_ITEM_DISCOUNT',
                   'ORDER_SHIPPING_DISCOUNT',
                   'ORDER_PROMOTION_ID',
                   'BUSINESS_CUSTOMER'
                  ]
    #Converting Order Datetime into Datetime Object
    df['ORDER_DATETIME_DATETIME'] = pd.to_datetime(df['ORDER_DATETIME_STRING'])
    df.drop('ORDER_DATETIME_STRING', 1, inplace=True)
    
    #Reordering Columns
    df = df[
        [
            'AMAZON_ORDER_ID', 
            'ORDER_DATETIME_DATETIME', 
            'MERCHANT_ORDER_ID', 
            'ORDER_STATUS', 
            'FULLFILLMENT_CHANNEL', 
            'SALES_CHANNEL', 
            'SHIPPING_SERVICE_LEVEL', 
            'PRODUCT_NAME', 
            'SKU', 
            'ASIN', 
            'ITEM_STATUS', 
            'QUANTITY_ORDERED', 
            'ORDER_CURRENCY', 
            'ORDER_ITEM_PRICE',
            'ORDER_ITEM_TAX',
            'ORDER_SHIPPING_PRICE',
            'ORDER_SHIPPING_TAX',
            'ORDER_GIFT_WRAP_PRICE',
            'ORDER_GIFT_WRAP_TAX',
            'ORDER_ITEM_DISCOUNT',
            'ORDER_SHIPPING_DISCOUNT',
            'ORDER_PROMOTION_ID',
            'BUSINESS_CUSTOMER'
        ]
    ]

    #replacing NaN with 0
    df.fillna(0, inplace=True)
    
    return df

In [27]:
#TEST for CUSTOMER ORDER DATA PREPARATION FUNCTION
test_file_path = "/Users/tm/Downloads/02_Monthly_Data/Customer_Orders/Customer_Orders_M-2018-06.txt"
customer_order_data_prep(test_file_path).head(2)

Unnamed: 0,AMAZON_ORDER_ID,ORDER_DATETIME_DATETIME,MERCHANT_ORDER_ID,ORDER_STATUS,FULLFILLMENT_CHANNEL,SALES_CHANNEL,SHIPPING_SERVICE_LEVEL,PRODUCT_NAME,SKU,ASIN,...,ORDER_ITEM_PRICE,ORDER_ITEM_TAX,ORDER_SHIPPING_PRICE,ORDER_SHIPPING_TAX,ORDER_GIFT_WRAP_PRICE,ORDER_GIFT_WRAP_TAX,ORDER_ITEM_DISCOUNT,ORDER_SHIPPING_DISCOUNT,ORDER_PROMOTION_ID,BUSINESS_CUSTOMER
0,403-6279650-0558719,2018-06-27 09:33:27+00:00,403-6279650-0558719,Shipped,Amazon,Amazon.fr,Expedited,weltenmann Espadrilles Slip-on Classiques pour...,WE-01-02-01-02-01-09-44,B07DC9K4KV,...,27.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,False
1,408-9564735-8030741,2018-06-24 20:47:46+00:00,408-9564735-8030741,Shipped,Amazon,Amazon.fr,Expedited,weltenmann Espadrilles Slip-on Classiques pour...,WE-01-02-01-01-01-05-44,B07DFQN5H1,...,27.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,False


In [28]:
# formatting: year / year_iso / quarter / month / week_iso
def transform_datetime_to_any_dateformat(df, column_name, formatting):
    if formatting == 'year':
        return df[column_name].dt.strftime('%Y')
    elif formatting == 'year_iso':
        return df[column_name].dt.strftime('%G') #%G - 4-digit year corresponding to the ISO week number (see %V).
    elif formatting == 'quarter':
        return df[column_name].dt.quarter
    elif formatting == 'month':
        return df[column_name].dt.strftime('%Y.%m')
    elif formatting == 'week_iso':
        return df[column_name].dt.strftime('%G.%V') #%V - The ISO 8601 week number of the current year (01 to 53), where week 1 is the first week that has at least 4 days in the current year, and with Monday as the first day of the week
    else: print("Value for 'formatting' invalid.")


In [29]:
#TEST for 'transform_datetime_to_any_dateformat' FUNCTION
test_file_path = '/Users/tm/Downloads/02_Monthly_Data/Customer_Orders/Customer_Orders_M-2018-06.txt'
df = customer_order_data_prep(test_file_path)
column_name = 'ORDER_DATETIME_DATETIME'
formatting = 'week_iso'

transform_datetime_to_any_dateformat(df, column_name, formatting)

0     2018.26
1     2018.25
2     2018.25
3     2018.25
4     2018.26
       ...   
56    2018.26
57    2018.26
58    2018.26
59    2018.26
60    2018.25
Name: ORDER_DATETIME_DATETIME, Length: 61, dtype: object

In [6]:
#CUSTOMER SHIPMENTS Data Preparation
def customer_shipments_data_prep(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_csv(file_path, sep='\t')
    #Creating a new header in which '-' in column names are replaced by '_' and uses new header in dataframe  
    columns_new = {}
    for col in df.columns:
        col_new = col.replace('-','_').upper()
        columns_new[col] = col_new
    df.rename(columns=columns_new, inplace=True)

    #Dropping irrelevant columns
    df.drop([
            'MERCHANT_ORDER_ID',
            'AMAZON_ORDER_ITEM_ID',
            'MERCHANT_ORDER_ITEM_ID',
            'BUYER_PHONE_NUMBER',
            'SHIP_SERVICE_LEVEL',
            'SHIP_PHONE_NUMBER',
            ], 1, inplace=True)
    
    #Renaming columns 
    df.columns = [
            'AMAZON_ORDER_ID',
            'SHIPMENT_ID',
            'SHIPMENT_ITEM_ID',
            'PURCHASE_DATETIME_STRING',
            'PAYMENTS_DATETIME_STRING',
            'SHIPMENT_DATETIME_STRING',
            'REPORTING_DATETIME_STRING',
            'BUYER_EMAIL',
            'BUYER_NAME',
            'SKU',
            'PRODUCT_NAME',
            'QUANTITY_SHIPPED',
            'CURRENCY',
            'ITEM_PRICE',
            'ITEM_TAX',
            'SHIPPING_PRICE',
            'SHIPPING_TAX',
            'GIFT_WRAP_PRICE',
            'GIFT_WRAP_TAX',
            'RECIPIENT_NAME',
            'SHIPPING_ADDRESS_1',
            'SHIPPING_ADDRESS_2',
            'SHIPPING_ADDRESS_3',
            'SHIPPING_CITY',
            'SHIPPING_STATE',
            'SHIPPING_POSTAL_CODE',
            'SHIPPING_COUNTRY',
            'BILLING_ADDRESS_1',
            'BILLING_ADDRESS_2',
            'BILLING_ADDRESS_3',
            'BILLING_CITY',
            'BILLING_STATE',
            'BILLING_POSTAL_CODE',
            'BILLING_COUNTRY',
            'ITEM_DISCOUNT',
            'SHIP_DISCOUNT',
            'CARRIER',
            'TRACKING_NUMBER',
            'ARRIVAL_DATETIME_STRING',
            'FULFILLMENT_CENTER_ID',
            'FULFILLMENT_CHANNEL',
            'SALES_CHANNEL'
                ]
    #Converting Order Datetime into Datetime Object
    df['PURCHASE_DATETIME_DATETIME'] = pd.to_datetime(df['PURCHASE_DATETIME_STRING'])
    df['PURCHASE_DATE'] = df['PURCHASE_DATETIME_DATETIME'].dt.date
    df['PAYMENTS_DATETIME_DATETIME'] = pd.to_datetime(df['PAYMENTS_DATETIME_STRING'])
    df['PAYMENTS_DATE'] = df['PAYMENTS_DATETIME_DATETIME'].dt.date
    df['SHIPMENT_DATETIME_DATETIME'] = pd.to_datetime(df['SHIPMENT_DATETIME_STRING'])
    df['SHIPMENT_DATE'] = df['SHIPMENT_DATETIME_DATETIME'].dt.date
    df['REPORTING_DATETIME_DATETIME'] = pd.to_datetime(df['REPORTING_DATETIME_STRING'])
    df['REPORTING_DATE'] = df['REPORTING_DATETIME_DATETIME'].dt.date
    df['ARRIVAL_DATETIME_DATETIME'] = pd.to_datetime(df['ARRIVAL_DATETIME_STRING'])
    df['ARRIVAL_DATE'] = df['ARRIVAL_DATETIME_DATETIME'].dt.date

            
    df.drop([
        'PURCHASE_DATETIME_STRING', 
        'PAYMENTS_DATETIME_STRING',
        'SHIPMENT_DATETIME_STRING',
        'REPORTING_DATETIME_STRING',
        'ARRIVAL_DATETIME_STRING'
            ], 1, inplace=True)
    
    #Reordering Columns
    df = df[
        [
            'AMAZON_ORDER_ID',
            'SHIPMENT_ID',
            'SHIPMENT_ITEM_ID',
            'PURCHASE_DATETIME_DATETIME',
            'PURCHASE_DATE',
            'PAYMENTS_DATETIME_DATETIME',
            'PAYMENTS_DATE',
            'SHIPMENT_DATETIME_DATETIME',
            'SHIPMENT_DATE',
            'REPORTING_DATETIME_DATETIME',
            'REPORTING_DATE',
            'BUYER_EMAIL',
            'BUYER_NAME',
            'SKU',
            'PRODUCT_NAME',
            'QUANTITY_SHIPPED',
            'CURRENCY',
            'ITEM_PRICE',
            'ITEM_TAX',
            'SHIPPING_PRICE',
            'SHIPPING_TAX',
            'GIFT_WRAP_PRICE',
            'GIFT_WRAP_TAX',
            'RECIPIENT_NAME',
            'SHIPPING_ADDRESS_1',
            'SHIPPING_ADDRESS_2',
            'SHIPPING_ADDRESS_3',
            'SHIPPING_CITY',
            'SHIPPING_STATE',
            'SHIPPING_POSTAL_CODE',
            'SHIPPING_COUNTRY',
            'BILLING_ADDRESS_1',
            'BILLING_ADDRESS_2',
            'BILLING_ADDRESS_3',
            'BILLING_CITY',
            'BILLING_STATE',
            'BILLING_POSTAL_CODE',
            'BILLING_COUNTRY',
            'ITEM_DISCOUNT',
            'SHIP_DISCOUNT',
            'CARRIER',
            'TRACKING_NUMBER',
            'ARRIVAL_DATETIME_DATETIME',
            'ARRIVAL_DATE',
            'FULFILLMENT_CENTER_ID',
            'FULFILLMENT_CHANNEL',
            'SALES_CHANNEL'
        ]
    ]
    
    

    #replacing NaN with 0
    df.fillna(0, inplace=True)
    
    return df

In [7]:
#TEST CUSTOMER SHIPMENTS Data Preparation Function
file_path = '/Users/tm/Downloads/02_Monthly_Data/Customer_Shipments/Customer Shipments_M-2019-12.txt'
customer_shipments_data_prep(file_path).head(2)

Unnamed: 0,AMAZON_ORDER_ID,SHIPMENT_ID,SHIPMENT_ITEM_ID,PURCHASE_DATETIME_DATETIME,PURCHASE_DATE,PAYMENTS_DATETIME_DATETIME,PAYMENTS_DATE,SHIPMENT_DATETIME_DATETIME,SHIPMENT_DATE,REPORTING_DATETIME_DATETIME,...,BILLING_COUNTRY,ITEM_DISCOUNT,SHIP_DISCOUNT,CARRIER,TRACKING_NUMBER,ARRIVAL_DATETIME_DATETIME,ARRIVAL_DATE,FULFILLMENT_CENTER_ID,FULFILLMENT_CHANNEL,SALES_CHANNEL
0,302-5898945-7685925,DG8JLgmWc,DWMNPT8BR,2019-12-30 14:52:14+00:00,2019-12-30,2019-12-30 22:57:19+00:00,2019-12-30,2019-12-30 22:13:14+00:00,2019-12-30,2019-12-30 23:13:21+00:00,...,DE,0.0,0.0,AMZN_DE,AB0080738518,2019-12-31 19:00:00+00:00,2019-12-31,WRO2,AFN,Amazon.de
1,403-9309367-5311554,DG7hytmNc,D2Gb08ZsR,2019-12-26 11:25:34+00:00,2019-12-26,2019-12-28 13:21:46+00:00,2019-12-28,2019-12-28 12:26:12+00:00,2019-12-28,2019-12-28 13:26:19+00:00,...,IT,0.0,0.0,AMZN_IT,BA0055949236,2019-12-31 19:00:00+00:00,2019-12-31,PRG2,AFN,Amazon.it


In [8]:
#Fullfillment Center
def fulfillment_center_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [9]:
#TEST fulfillment_center_data Function
filepath = '/Users/tm/Downloads/01 Master Data/Fulfillment Centers/Fulfillment Centers.xlsx'
fulfillment_center_data(filepath).head(2)

Unnamed: 0,FULFILLMENT_CENTER_ID,STORAGE_COUNTRY
0,BDEA,DE
1,BDEC,DE


In [285]:
#semimonthly_statements function
def semimonthly_statements(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_csv(filepath, sep='\t')
    columns_new = {}
    for col in df.columns:
        col_new = col.replace('-','_').upper()
        columns_new[col] = col_new
    df.rename(columns=columns_new, inplace=True)
    
    if not 'PROMOTION_ID' in df.index:
        df['PROMOTION_ID'] = np.nan
    
    return df

In [286]:
#TEST semimonthly_statements Function
filepath = '/Users/tm/Downloads/03 Variable Data/Semimonthly Statements/DE/DE_20180527 - 20180415_v2.txt'
df = semimonthly_statements(filepath)
df.head()

Unnamed: 0,SETTLEMENT_ID,SETTLEMENT_START_DATE,SETTLEMENT_END_DATE,DEPOSIT_DATE,TOTAL_AMOUNT,CURRENCY,TRANSACTION_TYPE,ORDER_ID,MERCHANT_ORDER_ID,ADJUSTMENT_ID,...,AMOUNT,FULFILLMENT_ID,POSTED_DATE,POSTED_DATE_TIME,ORDER_ITEM_CODE,MERCHANT_ORDER_ITEM_ID,MERCHANT_ADJUSTMENT_ITEM_ID,SKU,QUANTITY_PURCHASED,PROMOTION_ID
0,7276623832,15.04.2018 15:45:00 UTC,27.05.2018 15:48:03 UTC,29.05.2018 15:48:03 UTC,-4719.0,EUR,,,,,...,,,,,,,,,,
1,7276623832,,,,,,other-transaction,,,,...,-4719.0,,15.05.2018,15.05.2018 19:15:34 UTC,,,,,,


In [12]:
#VAT_country_rules_data
def VAT_country_rules_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [13]:
#TEST VAT_country_rules_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/VAT Country Rules/VAT Country Rules.xlsx'
VAT_country_rules_data(filepath).head(2)

Unnamed: 0,DATE_TIME_FROM,DATE_TIME_TO,RECEPTION_COUNTRY,VAT_COUNTRY_RULE
0,2018-06-01 00:00:01,2019-12-31 23:59:59,CZ,STORAGE_COUNTRY
1,2018-06-01 00:00:01,2019-12-31 23:59:59,DE,STORAGE_COUNTRY


In [14]:
#VAT_rates_data
def VAT_rates_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [15]:
#TEST VAT_rates_data Function
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/VAT Rates/VAT Rates.xlsx'
VAT_rates_data(filepath).head(2)

Unnamed: 0,COUNTRY,VAT_RATE_VALUE,VAT_RATE_STRING
0,CZ,0.21,21%
1,DE,0.19,19%


In [16]:
#CUSTOMER RETURNS DATA PREPARATION FUNCTION
def customer_returns_data_prep(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_csv(filepath, sep='\t')
    #Creating a new header in which '-' in column names are replaced by '_' and uses new header in dataframe  
    columns_new = {}
    for col in df.columns:
        col_new = col.replace('-','_').upper()
        columns_new[col] = col_new
    df.rename(columns=columns_new, inplace=True)
    
    #Renaming columns 
    df.columns = [
        'RETURN_DATE', 
        'AMAZON_ORDER_ID', 
        'SKU', 
        'ASIN', 
        'FNSKU', 
        'PRODUCT_NAME',
        'RETURN_QUANTITY_RETURNED', 
        'RETURN_FULFILLMENT_CENTER_ID', 
        'RETURN_CONDITION',
        'RETURN_REASON', 
        'RETURN_STATUS', 
        'RETURN_LICENSE_PLATE_NUMBER', 
        'RETURN_CUSTOMER_COMMENTS'
    ]
    
    #Converting Order Datetime into Datetime Object
    df['RETURN_DATE_DATETIME'] = pd.to_datetime(df['RETURN_DATE'])
    df.drop(
        [
            'RETURN_DATE',
            'ASIN',
            'FNSKU', 
            'PRODUCT_NAME'
        ], 1, inplace=True)
    df['RETURN_DATE'] = df['RETURN_DATE_DATETIME'].dt.date

    #Reordering Columns
    #df = df[[]]

    #replacing NaN with 0
    #df.fillna(0, inplace=True)
    
    return df

In [17]:
#TEST customer_returns_data_prep Function
filepath = '/Users/tm/Downloads/Data Input/02 Monthly Data/Customer Returns/Customer Returns_M-2018-07.txt'
customer_returns_data_prep(filepath).head(2)

Unnamed: 0,AMAZON_ORDER_ID,SKU,RETURN_QUANTITY_RETURNED,RETURN_FULFILLMENT_CENTER_ID,RETURN_CONDITION,RETURN_REASON,RETURN_STATUS,RETURN_LICENSE_PLATE_NUMBER,RETURN_CUSTOMER_COMMENTS,RETURN_DATE_DATETIME,RETURN_DATE
0,408-9101898-8021108,WE-01-02-01-01-01-04-45,1,MAD4,SELLABLE,APPAREL_STYLE,Unit returned to inventory,LPNHE240804120,,2018-07-31 21:52:31+00:00,2018-07-31
1,302-3209019-8069169,WE-01-02-01-01-01-02-41,1,KTW1,SELLABLE,APPAREL_TOO_SMALL,Unit returned to inventory,LPNHE249908274,,2018-07-31 17:46:51+00:00,2018-07-31


In [18]:
#REIMBURSEMENTS DATA PREPARATION FUNCTION
def reimbursements_data_prep(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_csv(filepath, sep='\t')
    #Creating a new header in which '-' in column names are replaced by '_' and uses new header in dataframe  
    columns_new = {}
    for col in df.columns:
        col_new = col.replace('-','_').upper()
        columns_new[col] = col_new
    df.rename(columns=columns_new, inplace=True)
    
    #Renaming columns 
    df.columns = [
            'REIMBURSEMENT_APPROVAL_DATE', 
            'REIMBURSEMENT_ID', 
            'REIMBURSEMENT_CASE_ID', 
            'AMAZON_ORDER_ID',
            'REIMBURSEMENT_REASON', 
            'SKU', 
            'FNSKU', 
            'ASIN', 
            'PRODUCT_NAME', 
            'REIMBURSEMENT_CONDITION',
            'REIMBURSEMENT_CURRENCY_UNIT', 
            'REIMBURSEMENT_AMOUNT_PER_UNIT', 
            'REIMBURSEMENT_AMOUNT_TOTAL',
            'REIMBURSEMENT_QUANTITY_REIMBURSED_CASH', 
            'REIMBURSEMENT_QUANTITY_REIMBURSED_INVENTORY',
            'REIMBURSEMENT_QUANTITY_REIMBURSED_TOTAL', 
            'REIMBURSEMENT_ORIGINAL_REIMBURSEMENT_ID',
            'REIMBURSEMENT_ORIGINAL_REIMBURSEMENT_TYPE'
    ]
    
    #Converting Order Datetime into Datetime Object
    df['REIMBURSEMENT_APPROVAL_DATETIME'] = pd.to_datetime(df['REIMBURSEMENT_APPROVAL_DATE'])
    df.drop('REIMBURSEMENT_APPROVAL_DATE', 1, inplace=True)
    df['REIMBURSEMENT_APPROVAL_DATE'] = df['REIMBURSEMENT_APPROVAL_DATETIME'].dt.date

    #Reordering Columns
    #df = df[[]]

    #replacing NaN with 0
    #df.fillna(0, inplace=True)
    
    return df

In [19]:
#TEST reimbursements_data_prep Function
filepath = '/Users/tm/Downloads/Data Input/02 Monthly Data/Reimbursements/Reimbursements_M-2019-05.txt'
reimbursements_data_prep(filepath).head(2)

Unnamed: 0,REIMBURSEMENT_ID,REIMBURSEMENT_CASE_ID,AMAZON_ORDER_ID,REIMBURSEMENT_REASON,SKU,FNSKU,ASIN,PRODUCT_NAME,REIMBURSEMENT_CONDITION,REIMBURSEMENT_CURRENCY_UNIT,REIMBURSEMENT_AMOUNT_PER_UNIT,REIMBURSEMENT_AMOUNT_TOTAL,REIMBURSEMENT_QUANTITY_REIMBURSED_CASH,REIMBURSEMENT_QUANTITY_REIMBURSED_INVENTORY,REIMBURSEMENT_QUANTITY_REIMBURSED_TOTAL,REIMBURSEMENT_ORIGINAL_REIMBURSEMENT_ID,REIMBURSEMENT_ORIGINAL_REIMBURSEMENT_TYPE,REIMBURSEMENT_APPROVAL_DATETIME,REIMBURSEMENT_APPROVAL_DATE
0,1894024892,5225049222,303-3130291-4257167,FeeCorrection,,,,,,EUR,7.4,7.4,,,,,,2019-05-16 18:55:05+00:00,2019-05-16


In [258]:
# Retrieve daily exchange rate and add to csv file
def daily_ecb_exchange_rate(filepath):
    df = pd.read_csv(filepath, sep=',')
    daily_rate_dict = {}
    
    #get exchange rate data
    import requests
    r = requests.get('http://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml', stream=True)
    from xml.etree import ElementTree as ET
    tree = ET.parse(r.raw)
    root = tree.getroot()
    namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}
    for cube in root.findall('.//ex:Cube[@currency]', namespaces=namespaces):
        # data is added to dict
        daily_rate_dict[str(cube.attrib['currency'])] = cube.attrib['rate']
    daily_rate_dict['Date'] = datetime.date.today().strftime("%d.%m.%y")
    
    #dict is used for new df row 
    new_row = pd.DataFrame(data=daily_rate_dict, index=[0])

    #append row to the dataframe
    df = pd.concat([new_row, df], sort=False).reset_index(drop = True)
   
    #move 'Date' column to front
    cols = list(df)
    cols.insert(0, cols.pop(cols.index('Date')))
    df = df.loc[:, cols]
    
    #write new df into csv_file
    df.to_csv(filepath, index=False)


In [259]:
# Retrieve daily exchange rate and add to csv file
filepath = '/Users/tm/Downloads/01 Master Data/Exchange_Rates/eurofxref-hist.csv'
daily_ecb_exchange_rate(filepath)
df = pd.read_csv(filepath, sep=',')
df.head(2)

Unnamed: 0,Date,USD,JPY,BGN,CZK,DKK,GBP,HUF,PLN,RON,...,ZAR,CYP,EEK,LTL,LVL,MTL,ROL,SIT,SKK,TRL
0,01.02.20,1.1052,120.35,1.9558,25.21,7.4731,0.84175,337.05,4.3009,4.7789,...,16.49,,,,,,,,,
1,01.02.20,1.1052,120.35,1.9558,25.21,7.4731,0.84175,337.05,4.3009,4.7789,...,16.49,,,,,,,,,


In [32]:
#Fullfillment Center data
def COGS_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [33]:
#TEST COGS_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/COGS/COGS.xlsx'
COGS_data(filepath).head(2)

Unnamed: 0,DATE_TIME_FROM,DATE_TIME_TO,BRAND,SKU,ITEM_NAME,CURRENCY,COGS
0,2018-06-01 00:00:01,2019-03-31 23:59:59,weltenmann,WE-01-02-01-01-01-01-41,WELTENMANN Classic Men Cotton Slip-on Espadril...,EUR,8.0
1,2018-06-01 00:00:01,2019-03-31 23:59:59,weltenmann,WE-01-02-01-01-01-01-42,WELTENMANN Classic Men Cotton Slip-on Espadril...,EUR,8.0


In [34]:
#item_information_data
def item_information_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [35]:
#Test item_information_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/Item Information/Item Information.xlsx'
item_information_data(filepath).head(2)

Unnamed: 0,DATE_TIME_FROM,DATE_TIME_TO,BRAND,SKU,ITEM_NAME,EAN,ASIN,FNSKU,ITEM_LENGTH_MM,ITEM_WIDTH_MM,ITEM_HEIGHT_MM,ITEM_VOLUME_M3,ITEM_VOLUME_F3,ITEM_WEIGHT_KG,ITEM_WEIGHT_KG_NOMINAL,STORAGE_ITEM_SIZE,STORAGE_ITEM_MEDIA_TYPE,STORAGE_ITEM_CATEGORY,ITEM_SHIPMENT_TYPE
0,2018-06-01 00:00:01,2019-03-31 23:59:59,weltenmann,WE-01-02-01-01-01-01-41,WELTENMANN Classic Men Cotton Slip-on Espadril...,8436579700737,B07DFKN4TM,X000UUSGXL,315,110,70,0.002426,0.085653,0.57,1.0,Standard,Non-Media,Shoes,Standard Parcel
1,2018-06-01 00:00:01,2019-03-31 23:59:59,weltenmann,WE-01-02-01-01-01-01-42,WELTENMANN Classic Men Cotton Slip-on Espadril...,8436579700744,B07DFKW5FS,X000UUSC0N,315,110,70,0.002426,0.085653,0.62,1.0,Standard,Non-Media,Shoes,Standard Parcel


In [38]:
#fulfillment_type_data
def fulfillment_type_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [39]:
#Test fulfillment_type_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/Fulfillment Type/Fulfillment Type.xlsx'
item_information_data(filepath).head(2)

Unnamed: 0,DATE_TIME_FROM,DATE_TIME_TO,FULFILLMENT_CHANNEL,FULFILLMENT_TYPE
0,2018-06-01 00:00:01,2018-08-01 14:59:59,Amazon,PAN EU
1,2018-08-01 15:00:01,2019-05-31 15:59:59,Amazon,EFN


In [43]:
#shipping_fees_data
def shipping_fees_data(filepath):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sep='\t')
    return df

In [44]:
#Test shipping_fees_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/Shipping Fees/Shipping Fees.xlsx'
shipping_fees_data(filepath).head(2)

Unnamed: 0,DATE_TIME_FROM,DATE_TIME_TO,ITEM_SHIPMENT_TYPE,FULFILLMENT_TYPE,ITEM_WEIGHT_KG,ITEM_WEIGHT_KG_MIN,ITEM_WEIGHT_KG_MAX,SALES_CHANNEL,CURRENCY,SHIPPING_FEE
0,2018-06-01 00:00:01,2019-03-31 23:59:59,Standard Envelope,PAN EU,≤ 0.1,0.0,0.1,Amazon.co.uk,GBP,1.47
1,2018-06-01 00:00:01,2019-03-31 23:59:59,Standard Envelope,PAN EU,≤ 0.1,0.0,0.1,Amazon.de,EUR,1.81


In [126]:
#settlement_data
def settlement_data(filepath, sheet_name):
    #creates initial dataframe from CSV (tab separation)
    df = pd.read_excel(filepath, sheet_name=sheet_name, sep='\t')
    #drops all rows where all values are missing
    df = df.dropna(axis=0, how='all')
    return df

In [128]:
#Test settlement_data
filepath = '/Users/tm/Downloads/Data Input/01 Master Data/Settlement IDs Sales Channel Information/Settlement ID Sales Channel Information.xlsx'
sheet_name = 'SETTLEMENT_ID_COMPLETE'
settlement_data(filepath, sheet_name).head(2)

Unnamed: 0,SETTLEMENT_ID,START_DATE,SALES_CHANNEL
0,7276624000.0,2018-04-15,Amazon.es
1,10622870000.0,2018-09-16,Amazon.es


In [269]:
filepath = '/Users/tm/Downloads/Data Input/02 Monthly Data/Reimbursements/Reimbursements_M-2019-05.txt'
df = pd.read_csv(filepath, sep='\t')
df.columns.values

array(['approval-date', 'reimbursement-id', 'case-id', 'amazon-order-id',
       'reason', 'sku', 'fnsku', 'asin', 'product-name', 'condition',
       'currency-unit', 'amount-per-unit', 'amount-total',
       'quantity-reimbursed-cash', 'quantity-reimbursed-inventory',
       'quantity-reimbursed-total', 'original-reimbursement-id',
       'original-reimbursement-type'], dtype=object)

In [None]:
#Converts df into dictionary
def create_dictionary(filepath, index):
    df = pd.read_csv(filepath, sep='\t')
    list_of_dicts = [item for item in df.T.to_dict().values()]
    return list_of_dicts[index]
create_dictionary(file_path, 1)