In [1]:
import pandas as pd
import json
import requests
from concurrent.futures import ThreadPoolExecutor
import math

username = "admin"  # Replace with your CouchDB username
password = "admin"  # Replace with your CouchDB password
couchdb_url = "http://127.0.0.1:5984/"

def handle_nan(value, default='Unknown'):
    if pd.isna(value) or value is None or (isinstance(value, float) and math.isnan(value)):
        return default
    return value

def upload_album_to_couchdb(albums, batch_size=50000):
    # Prepare the payload for bulk upload
    payload = {"docs": []}

    for album in albums:
        payload["docs"].append(album)

        # If we reach the batch size, send the current batch
        if len(payload["docs"]) == batch_size:
            response = requests.post(couchdb_url + "albums/_bulk_docs", json=payload, auth=(username, password))
            if response.status_code == 201:
                print(f"Successfully added batch of {batch_size} albums to CouchDB.")
            else:
                print(f"Failed to add batch to CouchDB: {response.text}")
            
            # Reset the payload for the next batch
            payload["docs"] = []

    # Check for any remaining albums that didn't fill a complete batch
    if payload["docs"]:
        response = requests.post(couchdb_url + "albums/_bulk_docs", json=payload, auth=(username, password))
        if response.status_code == 201:
            print(f"Successfully added remaining {len(payload['docs'])} albums to CouchDB.")
        else:
            print(f"Failed to add remaining albums to CouchDB: {response.text}")

# Function to upload albums with embedded group and songs
def upload_albums_with_groups_and_songs(album_file, group_file, song_file):
    albums_data = pd.read_csv(album_file)
    groups_data = pd.read_csv(group_file)
    songs_data = pd.read_csv(song_file)

    groups_dict = groups_data.set_index('id').to_dict(orient='index')
    songs_grouped = songs_data.groupby('album_id')

    albums_json = []  # List to hold the JSON data
    
    for _, album in albums_data.iterrows():
        album_id = int(album['id'])

        group_id = int(album['group_id'])

        album_data = album.to_dict()
        group_info = groups_dict.get(group_id)

        # print("group", group_info)

        if (group_info):
            album_data['group'] = {
                '_id': str(group_id),
                'name': handle_nan(group_info['name'])
            }
        else:
            album_data['group'] = {}

        # Embed songs in the album
        if album_id in songs_grouped.groups:
            album_songs = songs_grouped.get_group(album_id).to_dict(orient='records')
            album_data['songs'] = album_songs

            for song in album_data['songs']:
                song.pop('album_id', None)
        else:
            album_data['songs'] = []

        album_data.pop('group_id', None)

        album_entry = {
            '_id': str(album_data['id']),
            'name': handle_nan(album_data['title']),
            'release_date': handle_nan(album_data['release_date']),
            'genre': handle_nan(album_data['genre']),
            'stock': handle_nan(album_data['stock']),
            'price': handle_nan(album_data['price']),
            'group': album_data['group'],
            'songs': album_data['songs']
        }

        albums_json.append(album_entry)
        
    upload_album_to_couchdb(albums_json)

    # # Use ThreadPoolExecutor to parallelize the uploads
    # with ThreadPoolExecutor(max_workers=15) as executor:  # Adjust max_workers as needed
    #     executor.map(upload_album_to_couchdb, albums_json)

# Upload Albums with embedded groups and songs
upload_albums_with_groups_and_songs("albums.csv", "groups.csv", "songs.csv")

  groups_data = pd.read_csv(group_file)
  songs_data = pd.read_csv(song_file)


Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added batch of 50000 albums to CouchDB.
Successfully added remaining 11364 albums to CouchDB.
