In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Prepare consolidated data

"""

# Import libraries

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import json

print(__doc__)

def read(file):
    data = pd.read_excel(file)
    return data

def clean_data(data):
    
    # Select numeric data alone from data object
    numeric_data = data.loc[:, data.dtypes != object]
    
    # Select string data alone from data object to trim
    trim_data = data.select_dtypes(['object'])
    
    # Trim all string objects
    trim_data = trim_data.apply(lambda x: x.str.strip())
    
    #Merge numeric and string data in to data object
    data = pd.concat([numeric_data, trim_data], axis=1)
    
    # Change column name to lower case
    data.columns = [x.lower() for x in data.columns]

    return data

def get_data():

    # Read customer details excel
    data1 = read("/home/azureuser/Aerospike/data/customer/customer details.xls")
    
    # Clean data1
    data1 = clean_data(data1)
    
    # Read customer address excel
    data2 = read("/home/azureuser/Aerospike/data/customer/customer_address.xls")
    
    # Clean data2
    data2 = clean_data(data2)
    
    # Split dataframe by column name
    data2_office = data2[data2["address_type"] == 'Office']
    data2_home = data2[data2["address_type"] == 'Home']
    
    # Rename column names for office
    data2_office=data2_office.rename(columns = {'address2':'office_address2', 'address1':'office_address1', 
                                                'address_type':'office_address_type', 'pincode':'office_pincode',
                                                'city':'office_city', 'state':'office_state', 'country':'office_country'})
    
    # Rename column names for home 
    data2_home=data2_home.rename(columns = {'address2':'home_address2', 'address1':'home_address1', 
                                                'address_type':'home_address_type', 'pincode':'home_pincode',
                                                'city':'home_city', 'state':'home_state', 'country':'home_country'})
    #print ('Count_Row=', data2_office.shape[0])
    
    # Merge office and home address
    data2 = pd.merge(data2_home, data2_office, on='cust_id', how='outer')
    data2 = data2.drop(['office_address_type', 'home_address_type'], axis=1)
    
    # Merge data1 and data2 by cust_id
    #data1.sort_values(['cust_id'], ascending=[True])
    data2 = pd.merge(data1, data2, on='cust_id', how='inner')
    
    # Read Customer vehicle excel
    data3 = read("/home/azureuser/Aerospike/data/customer/Customer_vehicle.xls")
    
    # Clean data3
    data3 = clean_data(data3)
    
    # Merge data1 and data3 by cust_id
    data3 = pd.merge(data2, data3, on='cust_id', how='inner')
    
    # Read Motor vehicle report excel
    data4 = read("/home/azureuser/Aerospike/data/customer/Motor_vehicle_report.xls")
    
    # Clean data4
    data4 = clean_data(data4)
    
    # Merge data3 and data4 by vehicle_no
    data4 = pd.merge(data3, data4, on='vehicle_no', how='inner')

if __name__ == '__main__':
    get_data()