In [42]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Prepare consolidated data

"""

# Import libraries

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import aerospike
import json

print(__doc__)

def read(file):
    data = pd.read_excel(file)
    return data

def clean_data(data):
    
    # Select numeric data alone from data object
    numeric_data = data.loc[:, data.dtypes != object]
    
    # Select string data alone from data object to trim
    trim_data = data.select_dtypes(['object'])
    
    # Trim all string objects
    trim_data = trim_data.apply(lambda x: x.str.strip())
    
    #Merge numeric and string data in to data object
    data = pd.concat([numeric_data, trim_data], axis=1)
    
    # Change column name to lower case
    data.columns = [x.lower() for x in data.columns]

    return data

def get_data():

    # Read customer address excel
    data = read("/home/azureuser/Aerospike/data/customer/customer_address.xls")
    
    # Clean data
    data = clean_data(data)
    return data

def load_address(data):
            
    grouped = data.groupby('cust_id')
        
    from collections import defaultdict
    results = defaultdict(lambda: defaultdict(dict))
        
    j = (data.groupby(['cust_id'], as_index=True)
        .apply(lambda x: x[['cust_id', 'address_type', 'strt_address', 'city', 'state', 'country', 'pincode']]
        .to_dict('records'))
        .reset_index()
        .rename(columns={0:'address'}))
        #.to_json(orient='split'))
        
    #j1 = json.dumps(json.loads(j), indent=2, sort_keys=True)
    #print (j1)
    
    key_data = data.drop_duplicates('cust_id').sort_values('cust_id')
    
    record_json = j.to_dict('records')
    print (type(record_json))
    print (record_json)
    
    # Split dataframe into key and records
    key = key_data['cust_id']
    
    config = {
        'hosts': [ ('127.0.0.1', 3000) ]
    }

    # Create a client and connect it to the cluster
    try:
        client = aerospike.client(config).connect()
    except:
        import sys
        print("failed to connect to the cluster with", config['hosts'])
        sys.exit(1)
       
    # Records are addressable via a tuple of (namespace, set, key)
    for pk, bin in zip(key, record_json):
        string = "('test', 's1ad', "
        string +=`pk`
        string = string + ")"
        
        from ast import literal_eval as make_tuple
        string = make_tuple(string)
        
        print (string)
        print (bin)
          
        try:
            client = aerospike.client(config).connect()
            # Write a record
            client.put(string, bin)
        except Exception as e:
            import sys
            print("error: {0}".format(e), file=sys.stderr)

        # Read a record after insert for verification
        (key, metadata, record) = client.get(string)
        print (key, record)
        
        # Close the connection to the Aerospike cluster
        client.close()
    
def remove_address(data):

    key = data['cust_id']
    
    config = {
        'hosts': [ ('127.0.0.1', 3000) ]
    }

    # Create a client and connect it to the cluster
    try:
        client = aerospike.client(config).connect()
    except:
        import sys
        print("failed to connect to the cluster with", config['hosts'])
        sys.exit(1)
    
    # Count a record for verification before delete
    #query = client.query('test', 'customer')
    #query.select('frequency' )
    #query.apply('countNumOfBinsWithValues', 'count')

    # Callback function prints the records as they are read
    #def print_result(value):
    #    print(value)

    # Execute the query and call print_result for each result
    #query.foreach(print_result)
        
    # Records are addressable via a tuple of (namespace, set, key)
    for pk in key:
        string = "('dev', 's1bd', "
        string +=`pk`
        string = string + ")"
        
        from ast import literal_eval as make_tuple
        string = make_tuple(string)
        
        try:
            client = aerospike.client(config).connect()
            # Remove a record
            client.remove(string)
        except Exception as e:
            import sys
            print("error: {0}".format(e), file=sys.stderr)

        # Count a record for verification after delete
    
        # Close the connection to the Aerospike cluster
        client.close()

if __name__ == '__main__':
    data = get_data()
    load_address(data)


Prepare consolidated data


<type 'list'>
[{'cust_id': 44261559, 'address': [{u'city': u'Hollywood', u'country': u'USA', u'pincode': 51520, u'state': u'California', u'cust_id': 44261559, u'strt_address': u'8411 1/2 Sunset Boulevard', u'address_type': u'Office'}]}, {'cust_id': 84711641, 'address': [{u'city': nan, u'country': u'USA', u'pincode': 50001, u'state': u'Bluffington', u'cust_id': 84711641, u'strt_address': u'21 Jumbo Street', u'address_type': u'Office'}]}, {'cust_id': 208312345, 'address': [{u'city': u'Chicago', u'country': u'USA', u'pincode': 50022, u'state': u'Illinois', u'cust_id': 208312345, u'strt_address': u'711 Calhoun Street', u'address_type': u'Office'}]}, {'cust_id': 217237812, 'address': [{u'city': nan, u'country': u'USA', u'pincode': 50518, u'state': nan, u'cust_id': 217237812, u'strt_address': u'79 Wistful Vista', u'address_type': u'Home'}]}, {'cust_id': 242339097, 'address': [{u'city': u'Santa Monica', u'country': u'USA', u'pincode': 50420, u'state': u'California