In [1]:
import pandas as pd
import json
import requests
from concurrent.futures import ThreadPoolExecutor
import math

username = "admin"  # Replace with your CouchDB username
password = "admin"  # Replace with your CouchDB password
couchdb_url = "http://127.0.0.1:5984/"

# Function to upload idols with embedded group and country details
def handle_nan(value, default='Unknown'):
    if pd.isna(value) or value is None or (isinstance(value, float) and math.isnan(value)):
        return default
    return value

def upload_idol_to_couchdb(idols, batch_size=50000):
    # Prepare the payload for bulk upload
    payload = {"docs": []}

    for idol in idols:
        payload["docs"].append(idol)

        # If we reach the batch size, send the current batch
        if len(payload["docs"]) == batch_size:
            response = requests.post(couchdb_url + "idols/_bulk_docs", json=payload, auth=(username, password))
            if response.status_code == 201:
                print(f"Successfully added batch of {batch_size} idols to CouchDB.")
            else:
                print(f"Failed to add batch to CouchDB: {response.text}")
            
            # Reset the payload for the next batch
            payload["docs"] = []

    # Check for any remaining idols that didn't fill a complete batch
    if payload["docs"]:
        response = requests.post(couchdb_url + "idols/_bulk_docs", json=payload, auth=(username, password))
        if response.status_code == 201:
            print(f"Successfully added remaining {len(payload['docs'])} idols to CouchDB.")
        else:
            print(f"Failed to add remaining idols to CouchDB: {response.text}")

# Function to upload idols with embedded group and country details
def upload_idols_with_groups_and_country(idol_file, group_file, country_file):
    idols_data = pd.read_csv(idol_file)
    groups_data = pd.read_csv(group_file)
    countries_data = pd.read_csv(country_file)

    groups_dict = groups_data.set_index('id').to_dict(orient='index')
    countries_dict = countries_data.set_index('id').to_dict(orient='index')

    idols_json = []  # List to hold the JSON data

    for _, idol in idols_data.iterrows():
        idol_id = int(idol['id'])

        idol_data = idol.to_dict()

        if pd.notna(idol['group_id']):
            group_id = int(idol['group_id'])
            group_info = groups_dict.get(group_id)
            group_info['_id'] = group_id

            idol_data['group'] = {
                '_id': str(group_id),
                'name': group_info['name']
            }
        else:
            idol_data['group'] = {}

        if pd.notna(idol['country_id']):
            country_id = int(idol['country_id'])
            country_info = countries_dict.get(country_id)
            country_info['id'] = country_id

            idol_data['country'] = {
                '_id': str(country_id),
                'name': country_info['name']
            }
        else:
            idol_data['country'] = {}

        idol_data.pop('group_id', None)
        idol_data.pop('country_id', None)

        idol_entry = {
            '_id': str(idol_data['id']),
            'stage_name': handle_nan(idol_data['stage_name']),
            'full_name': handle_nan(idol_data['full_name']),
            'birthdate': handle_nan(idol_data['date_of_birth']),
            'gender': handle_nan(idol_data['gender']),
            'weight': handle_nan(idol_data['weight']),
            'height': handle_nan(idol_data['height']),
            'group': idol_data['group'],
            'country': idol_data['country']
        }

        idols_json.append(idol_entry)

    upload_idol_to_couchdb(idols_json)

    # # Use ThreadPoolExecutor to parallelize the uploads
    # with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
    #     executor.map(upload_idol_to_couchdb, idols_json)

# Upload Idols with embedded groups and country details
upload_idols_with_groups_and_country("idols.csv", "groups.csv", "countries.csv")

  groups_data = pd.read_csv(group_file)


Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added batch of 50000 idols to CouchDB.
Successfully added remaining 17114 idols to CouchDB.
