In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Prepare consolidated data

"""

# Import libraries

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import aerospike
import json

print(__doc__)

class Consolidate:
    
    def __init__(self):
        pass
        
    def read(self, file):
        data = pd.read_excel(file)
        return data

    def clean_data(self, data):
    
        # Select numeric data alone from data object
        numeric_data = data.loc[:, data.dtypes != object]
    
        # Select string data alone from data object to trim
        trim_data = data.select_dtypes(['object'])
    
        # Trim all string objects
        trim_data = trim_data.apply(lambda x: x.str.strip())
    
        # Merge numeric and string data in to data object
        data = pd.concat([numeric_data, trim_data], axis=1)
    
        # Change column name to lower case
        data.columns = [x.lower() for x in data.columns]
        return data
    
    def load_data(self, data):
            
        # Split dataframe into key and records
        key = data['cust_id']
    
        record_json = data.to_dict(orient='records')
    
        config = {
            'hosts': [ ('127.0.0.1', 3000) ]
        }

        # Create a client and connect it to the cluster
        try:
            client = aerospike.client(config).connect()
        except:
            import sys
            print("failed to connect to the cluster with", config['hosts'])
            sys.exit(1)
    
        # Records are addressable via a tuple of (namespace, set, key)
        for pk, bin in zip(key, record_json):
            string = "('dev', 's1bd', "
            string +=`pk`
            string = string + ")"
        
            from ast import literal_eval as make_tuple
            string = make_tuple(string)
        
            try:
                client = aerospike.client(config).connect()
                # Write a record
                client.put(string, bin)
            except Exception as e:
                import sys
                print("error: {0}".format(e), file=sys.stderr)

            # Read a record after insert for verification
            (key, metadata, record) = client.get(string)
            print (key, record)
        
            # Close the connection to the Aerospike cluster
            client.close()
    
    def remove_data(self, data):

        key = data['cust_id']
    
        config = {
          'hosts': [ ('127.0.0.1', 3000) ]
        }

        # Create a client and connect it to the cluster
        try:
            client = aerospike.client(config).connect()
        except:
            import sys
            print("failed to connect to the cluster with", config['hosts'])
            sys.exit(1)
    
        # Records are addressable via a tuple of (namespace, set, key)
        for pk in key:
            string = "('dev', 's1bd', "
            string +=`pk`
            string = string + ")"
        
            from ast import literal_eval as make_tuple
            string = make_tuple(string)
        
            try:
                client = aerospike.client(config).connect()
                # Remove a record
                client.remove(string)
            except Exception as e:
                import sys
                print("error: {0}".format(e), file=sys.stderr)

            # Close the connection to the Aerospike cluster
            client.close()
    
def drive(mode):
    
    cons = Consolidate()

    # Read customer details excel
    data1 = cons.read("/home/azureuser/Aerospike/data/customer/customer details.xls")
    
    # Clean data1
    data1 = cons.clean_data(data1)
    #print (data1.columns)
    
    # Read Customer vehicle excel
    data2 = cons.read("/home/azureuser/Aerospike/data/customer/Customer_vehicle.xls")
    
    # Clean data2
    data2 = cons.clean_data(data2)
    #print (data2.columns)
    
    # Merge data1 and data2 by cust_id
    data2 = pd.merge(data1, data2, on='cust_id', how='inner')
    
    # Read Motor vehicle report excel
    data3 = cons.read("/home/azureuser/Aerospike/data/customer/Motor_vehicle_report.xls")
    
    # Clean data3
    data3 = cons.clean_data(data3)
    
    # Merge data2 and data3 by vehicle_no
    data3 = pd.merge(data2, data3, on='vehicle_no', how='inner')
    #print (data3.columns)
    
    # Read Customer address excel
    data4 = cons.read("/home/azureuser/Aerospike/data/customer/customer_address.xls")
    
    # Clean data4
    data4 = cons.clean_data(data4)
    
    # Merge data3 and data4 by cust_id
    data4 = pd.merge(data3, data4, on='cust_id', how='inner')
    #print (data4.columns)
    print (data4.count)
    
    if mode == 'load':
        # Load data in to aerospike
        cons.load_data(data4)
    else:
        # Remove data from aerospike
        cons.remove_data(data4)
    
if __name__ == '__main__':
    drive('delete')


Prepare consolidated data


<bound method DataFrame.count of        cust_id        dob  no_of_child child1_d_o_b  credit_rating  \
0   2396305636 1982-10-21            1   1950-02-01            571   
1     84711641 1988-02-07            1   1950-01-10            502   
2   1561684799 1994-02-22            1   1950-02-26            546   
3   9006719754 1987-05-07            1   1950-01-28            770   
4   3947774146 1970-05-20            0          NaT            618   
5   2006085121 1963-12-05            2   1950-01-25            560   
6   1907459679 1954-04-26            1   1950-02-27            557   
7   2398850356 1995-02-11            0          NaT            571   
8   8486613847 1992-10-06            2   2021-10-06            754   
9   4195354888 1953-08-05            2   1950-03-14            625   
10  8833668145 1990-12-16            0          NaT            765   
11  2093269256 1989-06-22            0          NaT            562   
12  4927435058 1988-01-03   

error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/main/client/remove.c', 110)
error: (2L, 'AEROSPIKE_ERR_RECORD_NOT_FOUND', 'src/m