## Working page leafthrough

In [None]:
import gradio as gr

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)  # Ensures page is not less than 1
    new_page = min(new_page, max_page)  # Ensures page does not exceed max_page
    return new_page

def paginate_go(page, max_page):
    try:
        page = int(page)
    except ValueError:
        print(f'Invalid page number: {page}')
        return None
    return paginate(page, max_page, 0)
def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1)
            max_page = gr.Number(label="Max Page", value=10)  # Assuming max 10 pages for testing
            go_button = gr.Button("Go to Page")
            next_button = gr.Button("Next")
            prev_button = gr.Button("Previous")
            current_page_label = gr.Label("Current Page: 1")

        def update_page_label(page):
            current_page_label.update(f"Current Page: {page}")

        go_button.click(fn=paginate_go, inputs=[page_input, max_page], outputs=page_input)
        next_button.click(fn=lambda x, y: paginate(x, y, 1), inputs=[page_input, max_page], outputs=page_input)
        prev_button.click(fn=lambda x, y: paginate(x, y, -1), inputs=[page_input, max_page], outputs=page_input)
        page_input.change(update_page_label)

    return app

app = create_gradio_app()
app.launch()

## Working going through images across different pages

In [None]:
import gradio as gr
import math
import os

IMAGES_TO_SHOW = 1  # Set to 1 since we're showing one audio file per page
loaded_audios = []  # Global variable to store the loaded audios

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)
    new_page = min(new_page, max_page)
    return new_page

def get_audio_for_page(all_audios, page):
    if 1 <= page <= len(all_audios):
        return all_audios[page - 1]
    return None  # Return None if the page is out of range

def update_audio_display(page):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    displayed_audio = get_audio_for_page(loaded_audios, page)
    audio_name = os.path.basename(displayed_audio) if displayed_audio else ""
    current_page_label_text = f"Current Page: {page}/{max_page}"
    return displayed_audio, audio_name, page, current_page_label_text

def handle_pagination(page, delta):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    new_page = paginate(page, max_page, delta)
    return update_audio_display(new_page)

def handle_audio_load(audios):
    global loaded_audios
    loaded_audios = audios
    return update_audio_display(1)  # Display first audio

def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
            audio_player = gr.Audio(label="Audio Player")
            audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)

        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1, visible=True)
            current_page_label = gr.Label("Current Page: 1/X")
            go_button = gr.Button("Go to Page")
            prev_button = gr.Button("Previous")
            next_button = gr.Button("Next")

        audio_loader.change(fn=handle_audio_load, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda page: handle_pagination(page, 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda page: handle_pagination(page, -1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda page: update_audio_display(page), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()


## Display audio files per page

In [None]:
import gradio as gr
import math
import os

IMAGES_TO_SHOW = 1  # Set to 1 since we're showing one audio file per page
loaded_audios = []  # Global variable to store the loaded audios

def paginate(page, max_page, page_change):
    new_page = page + page_change
    new_page = max(1, new_page)
    new_page = min(new_page, max_page)
    return new_page

def get_audio_for_page(all_audios, page):
    if 1 <= page <= len(all_audios):
        return all_audios[page - 1]
    return None  # Return None if the page is out of range

def update_audio_display(page):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    displayed_audio = get_audio_for_page(loaded_audios, page)
    audio_name = os.path.basename(displayed_audio) if displayed_audio else ""
    current_page_label_text = f"Current Page: {page}/{max_page}"
    return displayed_audio, audio_name, page, current_page_label_text

def handle_pagination(page, delta):
    global loaded_audios
    max_page = math.ceil(len(loaded_audios) / IMAGES_TO_SHOW)
    new_page = paginate(page, max_page, delta)
    return update_audio_display(new_page)

def handle_audio_load(audios):
    global loaded_audios
    loaded_audios = audios
    return update_audio_display(1)  # Display first audio

def create_gradio_app():
    with gr.Blocks() as app:
        with gr.Row():
            audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
            audio_player = gr.Audio(label="Audio Player")
            audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)

        with gr.Row():
            page_input = gr.Number(label="Page Number", value=1, visible=True)
            current_page_label = gr.Label("Current Page: 1/X")
            go_button = gr.Button("Go to Page")
            prev_button = gr.Button("Previous")
            next_button = gr.Button("Next")

        audio_loader.change(fn=handle_audio_load, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda page: handle_pagination(page, 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda page: handle_pagination(page, -1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda page: update_audio_display(page), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()



## Simplified script for managing one audio per page

In [None]:
import gradio as gr
import os

def create_gradio_app():
    loaded_audios = []  # Stores the loaded audio files

    def change_audio(index):
        if 0 <= index < len(loaded_audios):
            audio_file = loaded_audios[index]
            audio_name = os.path.basename(audio_file)
            current_page_label = f"Current Audio: {index + 1}/{len(loaded_audios)}"
            return audio_file, audio_name, index + 1, current_page_label
        return None, "", 1, "Audio not available"

    def next_audio(index):
        return change_audio(index)

    def prev_audio(index):
        return change_audio(index - 2)

    def load_audios(audios):
        nonlocal loaded_audios
        loaded_audios = audios
        return change_audio(0)

    with gr.Blocks() as app:
        audio_loader = gr.File(label="Load Audio Files", file_count='multiple')
        audio_player = gr.Audio(label="Audio Player")
        audio_name_box = gr.Textbox(label="Audio File Name", interactive=True)
        page_input = gr.Number(label="Go to page:", value=1, visible=True)
        current_page_label = gr.Label("Current Audio: 1/X")
        next_button = gr.Button("Next")
        prev_button = gr.Button("Previous")
        go_button = gr.Button("Go to Page")

        audio_loader.change(fn=load_audios, inputs=[audio_loader], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        next_button.click(fn=lambda index: next_audio(index), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        prev_button.click(fn=lambda index: prev_audio(index), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])
        go_button.click(fn=lambda index: change_audio(index - 1), inputs=[page_input], outputs=[audio_player, audio_name_box, page_input, current_page_label])

    return app

app = create_gradio_app()
app.launch()

## Inputs and Outputs in Gradio

### Basics of inputs and outputs

In [None]:
import gradio as gr

def change_textbox2():
    return 'So what does a fine person like you do in a place such as this?'

def change_textbox3():
    return 'Yeah, I know. I love coding so much that I am still there at 10PM doing stuff like this instead of playing Elden Ring.'

# The function change textboxes() is the function called by the go_button from the interface. The function itself can call other functions which will themselves
# send variables or new components that we can then return to the UI. 

def change_textboxes(initial_text):
    text1 = f'Oh so your name is {initial_text}! Nice to meet you!'
    text2 = change_textbox2()
    text3 = change_textbox3()

    return text1, text2, text3

with gr.Blocks() as demo:
    textbox1 = gr.Textbox(label='What is your name?')
    textbox2 = gr.Textbox()
    textbox3 = gr.Textbox()
    go_button = gr.Button('Do your magic')
    go_button.click(fn=change_textboxes, inputs=[textbox1], outputs=[textbox1, textbox2, textbox3])


demo.launch()



### Update the visibility of components

The function that you call through a button can also send new gradio components that will automatically get mapped to the outputs defined in the UI (go_button.click), and thus replace them / update them. That is how you change the visibility of an element in Gradio.

In [None]:
import gradio as gr

def change_textboxes(initial_text):
    text1 = f'Oh so your name is {initial_text}! Nice to meet you!'
    # Check if the user has entered a name
    if initial_text.strip():
        # Return visible textboxes with the respective messages
        textbox2 = gr.Textbox(value='So what does a fine person like you do in a place such as this?', visible=True)
        textbox3 = gr.Textbox(value='Yeah, I know. I love coding so much that I am still there at 10PM doing stuff like this instead of playing Elden Ring.', visible=True)
    else:
        # Return invisible textboxes
        textbox2 = gr.Textbox(value='', visible=False)
        textbox3 = gr.Textbox(value='', visible=False)
    return text1, textbox2, textbox3

with gr.Blocks() as demo:
    textbox1 = gr.Textbox(label='What is your name?')
    go_button = gr.Button('Do your magic')

    textbox2 = gr.Textbox(visible=False)  # Initially not visible
    textbox3 = gr.Textbox(visible=False)  # Initially not visible

    go_button.click(fn=change_textboxes, inputs=textbox1, outputs=[textbox1, textbox2, textbox3])

demo.launch()

# Whisper and transcription

In [11]:
import whisper
import json
import os
import torchaudio


def transcribe_audio(audio_path, whisper_model):
    audio_name = os.path.splitext(audio_path)[0]
    model = whisper.load_model(whisper_model)
    result = model.transcribe(audio_path)
    return {audio_name: result}




whisper_model = 'medium.en'
audio_file =  r"/home/maelys/AI_PROJECTS/SOUND/TOOLS/MRQ/ai-voice-cloning/training/train_mark_ultimate_8600/audio/But�_our_paths_soon_diverged_2_blood_guzzling.wav"

transcription = transcribe_audio(audio_file, whisper_model)

json_file_path = "json_test.json"
json_object = json.dumps(transcription, indent=4)

with open (json_file_path, "w") as outfile:
    outfile.write(json_object)



It seems I don't get exactly the same tokens or values as the MRQ ai-voice-cloning tool, although they are close. However, as I'm primarily building this tool to help me prepare datasets that I then will feed MRQ's tool, I find it hazardous to take the risk to botch the transcription of like thousands and thousand of audios, only to be compelled to do everything again in a few months. What I'm gonna do, then, is give a choice to the user of my tool. In the transcription tab, there will be a choice "Transcribe here" will launch and realize the transcribing internally with my tool. "Transcribe with MRQ" will make a textbox appear, describing the exact process to transcribe the audios through MRQ's tool, and then invite the user to go to the transcription check tab. In this last tab, the user will then simply point to the folder where the whisper.json has been created, whether internally or through MRQ.

## Trasncribe panel

In [41]:
import gradio as gr


def internal_transcriber(input, model, export_path):
    return gr.Textbox(label='Console output', value='This is WIP!', visible=True)

def choose_transcriber(transcriber_choice):
    if transcriber_choice == 'This tool':
        internal_transcriber_group = gr.Group(visible=True)
        mrq_tool_group = gr.Group(visible=False)
    
    else:
        internal_transcriber_group = gr.Group(visible=False)
        mrq_tool_group = gr.Group(visible=True)
    
    return internal_transcriber_group, mrq_tool_group


with gr.Blocks() as demo:
    choice_radio = gr.Radio(label='Which tool do you want to use for transcribing?', choices=['This tool', 'MRQ ai-voice-cloning'])

   
    internal_transcriber_group = gr.Group(visible=False)
    mrq_tool_group = gr.Group(visible=False)


    with internal_transcriber_group:
        input_folder = gr.Textbox(label='Path to the folder you want to transcribe')
        model_choice = gr.Dropdown(label='Which Whisper model do you want to use?', 
                                            choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", 
                                                    "medium", "medium.en",
                                                    "large", "large-v1", "large-v2", ])
        export_path = gr.Textbox(label='Path to the folder you want to export your transcribed json')
        info_textbox = gr.Textbox(visible=False)
        submit_button = gr.Button('Transcribe')
    
    with mrq_tool_group:
        instructions_text = """
      
                ># Hey there!
     
                >So you chose to use MRQ's ai-voice-cloning tool for your retranscription! Good choice, that tool is pure magic.

                >Here's how to do this:

                >>1. Go to MRQ ai-voice-cloning repo: [MRQ ai-voice-cloning](https://git.ecker.tech/mrq/ai-voice-cloning)
                2. Clone the repo, install the tool (you have all instructions on the git page)
                3. Put the voices you want to transcribe in a dedicated folder, inside the "voices" folder
                4. Launch the interface (start.bat or start.sh depending on your OS)
                5. Go to the "Training" tab
                6. Choose your voice in "Dataset Source"
                7. Click on Transcribe and Process
                8. The "whisper.json" is written in the "training" folder, so go get the path
                9. You're ready to go to the "Checkout Transcription Tab" here, point to the "whisper.json" file produced by MRQ ai-voice-cloning tool!
                
    """
        

        
        info_textbox = gr.Markdown(value = instructions_text)


    choice_radio.change(fn=choose_transcriber, inputs=[choice_radio], outputs=[internal_transcriber_group, mrq_tool_group])
    submit_button.click(fn=internal_transcriber, inputs=[input_folder, model_choice, export_path], outputs=[info_textbox])
    


demo.launch()

Running on local URL:  http://127.0.0.1:7889

To create a public link, set `share=True` in `launch()`.


