In [1]:
import pandas as pd
df_plot = pd.read_pickle("../data/large-data/df_plot.pkl")

In [10]:
import plotly.express as px

fig = px.scatter_3d(
    df_plot, x="x", y="z", z="y",  # Flipped y and z axes
    color="subcorpus", hover_name="word",
    title="t-SNE 3D visualization of word embeddings by subcorpus (independent t-SNE, axes flipped)",
    width=1200, height=900
)
fig.update_traces(marker=dict(size=3))
fig.update_layout(scene=dict(
    yaxis=dict(
        tickvals=[0, 1, 2, 3],
        ticktext=["christian_0_300", "christian_300_600", "pagan_0_300", "pagan_300_600"],
        title="Subcorpus"
    )
))
fig.show()

In [None]:
import pandas as pd
import pickle
import plotly.graph_objects as go
from ipywidgets import widgets, HBox, VBox, Output, interactive_output
from IPython.display import display

df_plot = pd.read_pickle("../data/large-data/df_plot.pkl")
subcorpora = df_plot['subcorpus'].unique().tolist()

def load_neighbors(subcorpus):
    with open("../data/large-data/word_neighbors.pkl", "rb") as f:
        return pickle.load(f).get(subcorpus, {})

neighbor_out = Output()

def plot_filtered(word, subcorpus, n_neighbors=10, show_context=True):
    sub_df = df_plot[df_plot['subcorpus'] == subcorpus]
    neighbors_dict = load_neighbors(subcorpus)
    neighbors = [word] + [n for n, _ in neighbors_dict.get(word, [])[:n_neighbors]]
    filtered = sub_df[sub_df['word'].isin(neighbors)]

    fig = go.Figure()
    if show_context:
        fig.add_trace(go.Scatter(
            x=sub_df['y'], y=sub_df['x'],
            mode='markers', marker=dict(size=3, color='lightgray'),
            name='context', text=sub_df['word'],
            hoverinfo='text', showlegend=False
        ))
    fig.add_trace(go.Scatter(
        x=filtered['y'], y=filtered['x'],
        mode='markers+text', marker=dict(size=8, color='red'),
        name='neighbors', text=filtered['word'],
        hoverinfo='text', textposition='top center'
    ))
    fig.update_layout(
        title=f"t-SNE 2D for '{word}' and neighbors in {subcorpus}",
        xaxis_title="t-SNE dim 1", yaxis_title="t-SNE dim 2",
        width=900, height=700,
    )

    neighbor_out.clear_output(wait=True)
    with neighbor_out:
        fig.show()
        print(f"Nearest neighbors in {subcorpus}:")
        for i, (n, sim) in enumerate(neighbors_dict.get(word, [])[:n_neighbors], 1):
            print(f"{i}. {n} (similarity: {sim:.3f})")

# Widgets
word_widget = widgets.Text(value='ἐχθρός', description='Word:')
subcorpus_widget = widgets.Dropdown(options=subcorpora, value=subcorpora[0], description='Subcorpus:')
neighbors_widget = widgets.IntSlider(value=10, min=1, max=30, description='Neighbors:')
context_widget = widgets.Checkbox(value=True, description='Show context dots')

ui = VBox([HBox([word_widget, subcorpus_widget, neighbors_widget, context_widget]), neighbor_out])
out = interactive_output(
    plot_filtered,
    {'word': word_widget, 'subcorpus': subcorpus_widget, 'n_neighbors': neighbors_widget, 'show_context': context_widget},
)
display(ui, out)

VBox(children=(HBox(children=(Text(value='ἐχθρός', description='Word:'), Dropdown(description='Subcorpus:', op…

Output()