In [1]:
import pandas as pd
import json
import requests
from concurrent.futures import ThreadPoolExecutor
import math

username = "admin"  # Replace with your CouchDB username
password = "admin"  # Replace with your CouchDB password
couchdb_url = "http://127.0.0.1:5984/"

def handle_nan(value, default='Unknown'):
    if pd.isna(value) or value is None or (isinstance(value, float) and math.isnan(value)):
        return default
    return value

def upload_transaction_to_couchdb(transactions, batch_size=50000):
    # Prepare the payload for bulk upload
    payload = {"docs": []}

    for transaction in transactions:
        payload["docs"].append(transaction)

        # If we reach the batch size, send the current batch
        if len(payload["docs"]) == batch_size:
            response = requests.post(couchdb_url + "transactions/_bulk_docs", json=payload, auth=(username, password))
            if response.status_code == 201:
                print(f"Successfully added batch of {batch_size} transactions to CouchDB.")
            else:
                print(f"Failed to add batch to CouchDB: {response.text}")
            
            # Reset the payload for the next batch
            payload["docs"] = []

    # Check for any remaining transactions that didn't fill a complete batch
    if payload["docs"]:
        response = requests.post(couchdb_url + "transactions/_bulk_docs", json=payload, auth=(username, password))
        if response.status_code == 201:
            print(f"Successfully added remaining {len(payload['docs'])} transactions to CouchDB.")
        else:
            print(f"Failed to add remaining transactions to CouchDB: {response.text}")

# Function to upload transactions with embedded album details
def upload_transactions_with_albums(transaction_file, transaction_album_file, albums_file, groups_file):
    transactions_data = pd.read_csv(transaction_file)
    transaction_albums_data = pd.read_csv(transaction_album_file)
    albums_data = pd.read_csv(albums_file)
    groups_data = pd.read_csv(groups_file)

    albums_dict = albums_data.set_index('id').to_dict(orient='index')
    groups_dict = groups_data.set_index('id').to_dict(orient='index')
    transaction_albums_grouped = transaction_albums_data.groupby('transaction_id')

    transactions_json = []

    for _, transaction in transactions_data.iterrows():
        transaction_id = int(transaction['id'])

        # Get albums for this transaction
        related_albums = []
        if transaction_id in transaction_albums_grouped.groups:
            album_ids = transaction_albums_grouped.get_group(transaction_id)['album_id']
            for album_id in album_ids:
                album_info = albums_dict.get(int(album_id), {}).copy()  # Copy to avoid modifying original dict
                album_info['id'] = album_id

                # Get group info based on group_id from album data
                group_id = album_info.get('group_id')
                if group_id and group_id in groups_dict:
                    album_info['group_name'] = groups_dict[group_id]['name']
                else:
                    album_info['group_name'] = None  # Fallback if group is not found

                album_entry = {
                    '_id': str(album_id),
                    'name': handle_nan(album_info['title']),
                    'group': handle_nan(album_info['group_name']),
                    'quantity': handle_nan(album_info['stock']),
                    'price': handle_nan(album_info['price'])
                }

                related_albums.append(album_entry)

        transaction_data = transaction.to_dict()
        transaction_data['albums'] = related_albums

        transaction_entry = {
            '_id': str(transaction_data['id']),
            'customer_id': handle_nan(transaction_data['customer_id']),
            'transaction_date': handle_nan(transaction_data['transaction_date']),
            'status': handle_nan(transaction_data['status']),
            'albums': transaction_data['albums']
        }

        transactions_json.append(transaction_entry)

    upload_transaction_to_couchdb(transactions_json)

    # # Upload transactions to CouchDB
    # with ThreadPoolExecutor(max_workers=15) as executor:  # Adjust max_workers as needed
    #     executor.map(upload_transaction_to_couchdb, transactions_json)

# Upload Transactions with embedded album details
upload_transactions_with_albums("transactions.csv", "transaction_albums.csv", "albums.csv", "groups.csv")

  groups_data = pd.read_csv(groups_file)


Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchDB.
Successfully added batch of 50000 transactions to CouchD