In [1]:
import os
import random
import numpy as np

from colors import colors, convert_rgb_to_hex
from functions import *
import pandas as pd

import project_paths as pp
import gradio as gr

# Loading Words & Creating (Sampled) Vocabulary

In [2]:
with open(os.path.join(pp.datasets_folder_path, 'words.txt')) as file:
    words = file.read().splitlines()

In [3]:
vocab_size = 50

random.seed(26)
vocab = sorted(random.sample(words, vocab_size))
vocab_frequencies = np.zeros(shape=vocab_size, dtype=np.int16)

# Creating Random Logits Distribution

In [4]:
np.random.seed(1618)
original_logits = np.random.normal(0, 0.8, size=vocab_size)
original_pmf = convert_logits_to_probabilities(original_logits)

# Creating DataFrame

In [5]:
df = pd.DataFrame(
    {
        'vocab': vocab,
        'vocab_frequencies': vocab_frequencies,
        'original_logits': original_logits,
        'effective_logits': original_logits,
        'original_pmf': original_pmf,
        'effective_pmf': original_pmf
    }
)

# Creating Gradio App

In [6]:
logs = []

def update_plot(
    repetition_penalty_checkbox,
    repetition_penalty_slider,
    frequency_penalty_checkbox,
    frequency_penalty_slider,
    temperature_checkbox,
    temperature_slider,
    top_k_checkbox,
    top_k_slider,
    top_p_checkbox,
    top_p_slider
):
    effective_logits = np.array(df['effective_logits'])
    if repetition_penalty_checkbox.value:
        effective_logits = apply_repetition_penalty(effective_logits, vocab_frequencies, repetition_penalty_slider.value)
    if frequency_penalty_checkbox.value:
        effective_logits = apply_frequency_penalty(effective_logits, vocab_frequencies, frequency_penalty_slider.value)
    if temperature_checkbox.value:
        effective_logits = apply_temperature(effective_logits, temperature_slider.value)
    if top_k_checkbox.value:
        effective_logits = select_top_k(effective_logits, top_k_slider.value)
    if top_p_checkbox.value:
        effective_logits = select_top_p(effective_logits, top_p_slider.value)
    effective_pmf = convert_logits_to_probabilities(effective_logits)

    df['effective_logits'] = effective_logits
    df['effective_pmf'] = effective_pmf

    effective_logits_limits = (1.1 * min(effective_logits), 1.1 * max(effective_logits))
    effective_pmf_limits = (1.1 * min(effective_pmf), 1.1 * max(effective_pmf))

    return gr.update(data={'labels': vocab, 'values': effective_logits_limits}), gr.update(data=df)

In [7]:
app = None

In [8]:
gr.close_all()
if app is not None:
    app.close()
    
with gr.Blocks() as app:
    gr.Markdown('# From Logits to Tokens')
    with gr.Row():
        with gr.Column(scale=5):
            vocab_dropdown = gr.Dropdown(
                label='Word',
                info='Select a word to change its frequency',
                choices=vocab,
                value=vocab[0],
                interactive=True
            )
    
            vocab_frequency_slider = gr.Slider(
                label='Word Frequency',
                info='Set the frequency of the above selected word',
                show_label=True,
                minimum=0,
                maximum=10,
                step=1,
                value=0,
                interactive=True
            )
            with gr.Column():
                with gr.Row():
                    repetition_penalty_checkbox = gr.Checkbox(
                        label='Repetition Penalty (RP)',
                        info='Check to enable, uncheck to disable',
                        value=False,
                        interactive=True,
                        scale=1
                    )
                    repetition_penalty_slider = gr.Slider(
                        minimum=0.01,
                        maximum=5,
                        step=0.01,
                        value=1,
                        interactive=True,
                        show_label=False,
                        scale=4
                    )
        
                with gr.Row():
                    frequency_penalty_checkbox = gr.Checkbox(
                        label='Frequency Penalty (FP)',
                        info='Check to enable, uncheck to disable',
                        value=False,
                        interactive=True,
                        scale=1
                    )
                    frequency_penalty_slider = gr.Slider(
                        minimum=0,
                        maximum=10,
                        step=0.01,
                        value=0,
                        interactive=True,
                        show_label=False,
                        scale=4
                    )
    
                with gr.Row():
                    temperature_checkbox = gr.Checkbox(
                        label='Temperature (T)',
                        info='Check to enable, uncheck to disable',
                        value=False,
                        interactive=True,
                        scale=1
                    )
                    temperature_slider = gr.Slider(
                        minimum=0,
                        maximum=10,
                        step=0.01,
                        value=0,
                        interactive=True,
                        show_label=False,
                        scale=4
                    )
    
                with gr.Row():
                    top_k_checkbox = gr.Checkbox(
                        label='Top K (top_k)',
                        info='Check to enable, uncheck to disable',
                        value=False,
                        interactive=True,
                        scale=1
                    )
                    top_k_slider = gr.Slider(
                        minimum=0,
                        maximum=100,
                        step=1,
                        value=100,
                        interactive=True,
                        show_label=False,
                        scale=4
                    )
        
                with gr.Row():
                    top_p_checkbox = gr.Checkbox(
                        label='Top P (top_p)',
                        info='Check to enable, uncheck to disable',
                        value=False,
                        interactive=True,
                        scale=1
                    )
                    top_p_slider = gr.Slider(
                        minimum=0,
                        maximum=1,
                        step=0.01,
                        value=1,
                        interactive=True,
                        show_label=False,
                        scale=4
                    )
        
        with gr.Column(scale=20):
            original_logits_limits = (df['original_logits'].min().item(), df['original_logits'].max().item())
            effective_logits_limits = (df['effective_logits'].min().item(), df['effective_logits'].max().item())
            
            original_logits_bar_plot = gr.BarPlot(
                label='Original Logits',
                value=df,
                x='vocab',
                y='original_logits',
                y_lim=original_logits_limits,
                x_title='Word',
                y_title='Logits',
                x_label_angle=270,
                color='original_logits',
                color_title='Logits',
                show_label=True
            )

            effective_logits_bar_plot = gr.BarPlot(
                label='Effective Logits',
                value=df,
                x='vocab',
                y='effective_logits',
                y_lim=effective_logits_limits,
                x_title='Word',
                y_title='Logits',
                x_label_angle=270,
                color='effective_logits',
                color_title='Logits',
                show_label=True
            )
            
        with gr.Column(scale=20):
            original_pmf_limits = (0, df['original_pmf'].max().item())
            effective_pmf_limits = (0, df['effective_pmf'].max().item())
            
            original_pmf_bar_plot = gr.BarPlot(
                label='Original Probability Mass Function (PMF)',
                value=df,
                x='vocab',
                y='original_pmf',
                y_lim=original_pmf_limits,
                x_title='Word',
                y_title='Probability',
                x_label_angle=270,
                color='original_pmf',
                color_title='Probability',
                show_label=True
            )
            
            effective_pmf_bar_plot = gr.BarPlot(
                label='Effective Probability Mass Function (PMF)',
                value=df,
                x='vocab',
                y='effective_pmf',
                y_lim=effective_pmf_limits,
                x_title='Word',
                y_title='Probability',
                x_label_angle=270,
                color='effective_pmf',
                color_title='Probability',
                show_label=True
            )

    repetition_penalty_checkbox.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    repetition_penalty_slider.release(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    frequency_penalty_checkbox.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    frequency_penalty_slider.release(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    temperature_checkbox.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    temperature_slider.release(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    top_k_checkbox.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    top_k_slider.release(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    top_p_checkbox.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    top_p_slider.change(
        fn=update_plot,
        inputs=[
            repetition_penalty_checkbox,
            repetition_penalty_slider,
            frequency_penalty_checkbox,
            frequency_penalty_slider,
            temperature_checkbox,
            temperature_slider,
            top_k_checkbox,
            top_k_slider,
            top_p_checkbox,
            top_p_slider
        ],
        outputs=[effective_logits_bar_plot, effective_pmf_bar_plot]
    )
    
app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Missing file: /Users/admodi/Library/Python/3.12/lib/python/site-packages/gradio/frpc_darwin_arm64_v0.3. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_darwin_arm64
2. Rename the downloaded file to: frpc_darwin_arm64_v0.3
3. Move the file to this location: /Users/admodi/Library/Python/3.12/lib/python/site-packages/gradio


