In [1]:
import pandas as pd
import json
import requests
from concurrent.futures import ThreadPoolExecutor
import math

username = "admin"  # Replace with your CouchDB username
password = "admin"  # Replace with your CouchDB password
couchdb_url = "http://127.0.0.1:5984/"

def handle_nan(value, default='Unknown'):
    if pd.isna(value) or value is None or (isinstance(value, float) and math.isnan(value)):
        return default
    return value

def upload_customer_to_couchdb(customers, batch_size=50000):
    # Prepare the payload for bulk upload
    payload = {"docs": []}

    for customer in customers:
        payload["docs"].append(customer)

        # If we reach the batch size, send the current batch
        if len(payload["docs"]) == batch_size:
            response = requests.post(couchdb_url + "customers/_bulk_docs", json=payload, auth=(username, password))
            if response.status_code == 201:
                print(f"Successfully added batch of {batch_size} customers to CouchDB.")
            else:
                print(f"Failed to add batch to CouchDB: {response.text}")
            
            # Reset the payload for the next batch
            payload["docs"] = []

    # Check for any remaining customers that didn't fill a complete batch
    if payload["docs"]:
        response = requests.post(couchdb_url + "customers/_bulk_docs", json=payload, auth=(username, password))
        if response.status_code == 201:
            print(f"Successfully added remaining {len(payload['docs'])} customers to CouchDB.")
        else:
            print(f"Failed to add remaining customers to CouchDB: {response.text}")

def upload_customers_with_transactions_and_country(customer_file, transaction_file, country_file):
    customers_data = pd.read_csv(customer_file)
    transactions_data = pd.read_csv(transaction_file)
    countries_data = pd.read_csv(country_file)

    # Create a dictionary for fast lookup of countries by 'id'
    countries_dict = countries_data.set_index('id').to_dict(orient='index')
    transactions_grouped = transactions_data.groupby('customer_id')

    customers_json = []

    for _, customer in customers_data.iterrows():
        customer_id = int(customer['id'])

        # Get related transactions for this customer
        related_transactions = []
        if customer_id in transactions_grouped.groups:
            related_transactions = transactions_grouped.get_group(customer_id)['id'].tolist()

        # Get country info based on the customer's 'country_id'
        country_id = customer.get('country_id')
        country_info = countries_dict.get(country_id, {}) if pd.notna(country_id) else {}

        customer_data = customer.to_dict()
        customer_data['transactions'] = related_transactions
        customer_data['country'] = {
            'id': country_id,
            'name': country_info['name'],
            'currency': country_info['currency']
        }

        customer_data.pop('country_id', None)

        customer_entry = {
            'id': customer_data['id'],
            'name': customer_data['name'],
            'email': customer_data['email'],
            'username': customer_data['username'],
            'password': customer_data['password'],
            'address': customer_data['address'],
            'country': customer_data['country'],
            'transactions': customer_data['transactions'],
        }

        customers_json.append(customer_entry)

    upload_customer_to_couchdb(customers_json)
    
    # # Upload customers to CouchDB
    # with ThreadPoolExecutor(max_workers=15) as executor:  # Adjust max_workers as needed
    #     executor.map(upload_customer_to_couchdb, customers_json)        

# Upload Customers with embedded transactions and country details
upload_customers_with_transactions_and_country("customers.csv", "transactions.csv", "countries.csv")

Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added batch of 50000 customers to CouchDB.
Successfully added remaining 45390 customers to CouchDB.
