In [3]:
from supabase import create_client,Client
import os
# get environment variables from .env file
import os

def load_env():
    with open('.env', 'r') as f:
        for line in f:
            if line.strip():
                key, value = line.strip().split('=', 1)
                os.environ[key] = value

load_env()


# get environment variables
url: str = os.getenv("SUPABASE_URL")
key: str = os.getenv("SUPABASE_KEY")
supabase = create_client(url, key)



In [10]:
# functions to load data
import json
import requests

def get_translations(count):
    url = 'https://read.84000-translate.org/translations.json?api-version=0.4.0'
    response = requests.get(url)
    works = json.loads(response.text)
    # only return the required number of translations
    print (f"Processing {count}of {len(works['work'])} translations")
    return works['work'][:count]

def get_work(Work_id):
    link = f"../../data-json/{Work_id}.json"
    print("loading file",link)
    # fet the json data from the local file
    with open(link, 'r') as file:
        work = json.loads(file.read())
    return work

def make_title(work,WorkId):
    titles = work['work'][0]['title']
    titles_dicts = [dict({'work_uuid': WorkId,
                          'type': title['titleType'],
                          'content': title['content'],
                          'language': title['language'],
                          'migrationId': title['titleMigrationId']}) for title in titles]
    return titles_dicts

def make_passages(work,WorkId):
    # check if work has translation
    if 'translation' not in work:
        return None
    passages = work['translation']['passage']
    passage_dict = [dict({'work_uuid': WorkId,
                          'xmlId': passage['xmlId'],
                          'parent': passage['parentId'],
                          'label': passage['passageLabel'],
                          'type': passage['segmentationType'],
                          'sort': passage['passageSort'],
                          'content': passage['content']}) for passage in passages]
    return passage_dict

def make_work(translation,work):
    title = translation['work'][0]['title'][0]['content']
    Current_Work = translation['work'][0]

    work_dict = dict({'xmlId': Current_Work['workId'],
                      'url': Current_Work['url'],
                      'type': Current_Work['workType'],
                      'toh': work['catalogueWorkIds'],
                      'publicationDate': translation['publicationDate'],
                      'publicationStatus': translation['publicationStatus'],
                      'publicationVersion': translation['publicationVersion'],
                      'title': title,
                      'migrationJson': translation})
    return work_dict

In [9]:
print(get_translations(1))

Processing 1of 4297 translations
[{'workId': 'UT22084-001-001', 'workType': 'eft:translation', 'url': '/translation/UT22084-001-001.json?api-version=0.4.0&annotate=true', 'htmlUrl': 'https://read.84000.co/translation/UT22084-001-001.html', 'catalogueWorkIds': 'toh1-1'}]


In [5]:
# upsert into supabase
def store_work(work_dict):
    # check if the work already exists with the same xmlId
    # if exists return the workId
    # else insert the work and return the workId
    does = supabase.table('works').select('uuid').eq('xmlId',work_dict['xmlId']).execute()
    if (len(does.data)!=0):
        print("⚠️ Work Already Exists, Delete Work and its Titles and Passages")
        # delete the work and all its titles and passages
        data_temp,count_temp = supabase.table('works').delete().eq('uuid',does.data[0]['uuid']).execute()
    try:
        work_data,count = supabase.table('works').insert(work_dict).execute()
        return work_data[1][0]['uuid']
    except Exception as e:
        print ("Error Occured",e)
        return None
    
    
def store_titles(titles_dicts):
    # check if titles already exists with the same migrationId
    try:
        title_data,count = supabase.table('titles').upsert(titles_dicts).execute()
        print("✅ Added titles",len(title_data[1]))
        return "success"
    except Exception as e:
        print("Error Occured",e)
        return e
        # If error occurs delete all just inserted data
        # try:
        #     if (title_data[1]):
        #         for inserts in title_data[1]:
        #             print(inserts)
        #             data_temp,count_temp = supabase.table('titles').delete().eq('id',inserts['id']).execute()
        #             print('deleted',data_temp[1])
        # except IndexError:
        #     print("No Data to Delete")
            
def store_passages(passage_dict):
    try:
        passage_data,count = supabase.table('passages').upsert(passage_dict).execute()
        print("✅ Added passages",len(passage_data[1]))
        return "success"
    except Exception as e:
        print("Error Occured",e)
        return e
        # If error occurs delete all just inserted data
        # try:
        #     if (passage_data[1]):
        #         for inserts in passage_data[1]:
        #             print(inserts)
        #             data_temp,count_temp = supabase.table('passages').delete().eq('id',inserts['id']).execute()
        #             print('deleted',data_temp[1])
        # except IndexError:
        #     print("No Data to Delete")


In [6]:
from ipywidgets import IntProgress
from IPython.display import display
    

In [5]:
# Simple Excuter ( Single Thread )
# 1. get required  number of translations
# 2. Load the data into Supabase and get back the workId
# 3. Get the work data from the local file
# 4. Get the titles and passages
# 5. Create Title and passage dictionary with WorkId

def load_data(translation_count):
    f = IntProgress(min=0, max=translation_count) # instantiate the bar
    f.description = "Progress"
    f.bar_style = "info"
    display(f) # display the bar
    works = get_translations(translation_count)
    count = 0
    for work in works:
        try:
            work_uuid = None
            WorkId = work['workId']
            print("🔁 Processing current:",WorkId,"entry",count+1,"of",len(works))
            translation = get_work(WorkId)
            work_dict = make_work(work,translation)
            print("☁️ Storing Work")
            work_uuid = store_work(work_dict)
            if (work_uuid != None):
                print("Work UUID",work_uuid)
                print("Loading Titles")
                titles_dicts = make_title(translation,work_uuid)
                print("Loading Passages")
                passage_dict = make_passages(translation,work_uuid)
                print("☁️ Storing Title")
                return_titles = store_titles(titles_dicts)
                if (return_titles != "success"):
                    raise Exception(return_titles)
                if (passage_dict):
                    print("Storing Passages")
                    return_passages = store_passages(passage_dict)
                    if (return_passages != "success"):
                        raise Exception(return_passages)
                else:
                    print("❌ No Passages to Store")
            count += 1
        except Exception as e:
            #delete work cascade
            try:
                print('‼️ error in',WorkId)
                # store error as string
                supabase.table('Error').upsert({'error':str(e),'xmlId':WorkId}).execute()
                if (work_uuid):
                    data_temp,count_temp = supabase.table('works').delete().eq('uuid',work_uuid).execute()
                    print('deleted',work_uuid)
            except IndexError:
                print("No Data to Delete")
        f.value += 1 # signal to increment the progress bar    
    # show a progress bar of the works loaded
    
    
    
    print("✅ Done",count,"works loaded")
    
    return


In [11]:
# Multi Threaded Executor
import concurrent.futures
from tqdm import tqdm
from ipywidgets import IntProgress, Output
from IPython.display import display, clear_output

# Define the function to process each work
def process_work(work, status_output, f):
    work_uuid = None
    WorkId = work['workId']
    try:
        status_output.append_stdout(f"🔁 Processing current: {WorkId} entry\n")
        translation = get_work(WorkId)
        work_dict = make_work(translation,work)
        status_output.append_stdout("☁️ Storing Work\n")
        work_uuid = store_work(work_dict)
        if work_uuid:
            status_output.append_stdout(f"Work UUID {work_uuid}\n")
            status_output.append_stdout("Loading Titles\n")
            titles_dicts = make_title(translation, work_uuid)
            status_output.append_stdout("Loading Passages\n")
            passage_dict = make_passages(translation, work_uuid)
            status_output.append_stdout("☁️ Storing Title\n")
            return_titles = store_titles(titles_dicts)
            if return_titles != "success":
                raise Exception(return_titles)
            if passage_dict:
                status_output.append_stdout("Storing Passages\n")
                return_passages = store_passages(passage_dict)
                if return_passages != "success":
                    raise Exception(return_passages)
            else:
                status_output.append_stdout("❌ No Passages to Store\n")
        f.value += 1  # Update the global progress bar
    except Exception as e:
        # Handle the error
        try:
            status_output.append_stdout(f'‼️ error in {WorkId}\n')
            # Store error as string
            supabase.table('Error').upsert({'error': str(e), 'xmlId': WorkId}).execute()
            if work_uuid:
                data_temp, count_temp = supabase.table('works').delete().eq('uuid', work_uuid).execute()
                status_output.append_stdout(f'deleted {work_uuid}\n')
        except IndexError:
            status_output.append_stdout("No Data to Delete\n")
    return

def load_data(translation_count):
    works = get_translations(translation_count)

    f = IntProgress(min=0, max=translation_count)  # instantiate the global progress bar
    f.description = "Progress"
    f.bar_style = "info"
    display(f)  # display the global progress bar

    status_output = Output()
    display(status_output)

    # Use ThreadPoolExecutor to run process_work in parallel with a maximum of 10 workers
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(process_work, work, status_output, f) for work in works]
        concurrent.futures.wait(futures)

    status_output.append_stdout(f"✅ Done {len(works)} works loaded\n")




In [12]:
# Example usage
translation_count = 1  # or any number you need
load_data(translation_count)

Processing 1of 4297 translations


IntProgress(value=0, bar_style='info', description='Progress', max=1)

Output()

loading file ../../data-json/UT22084-001-001.json
⚠️ Work Already Exists, Delete Work and its Titles and Passages
✅ Added titles 11
✅ Added passages 1421
