In [4]:
import pandas as pd
df_plot = pd.read_pickle("../data/large-data/df_plot.pkl")

In [None]:
import plotly.express as px

fig = px.scatter_3d(
    df_plot, x="x", y="z", z="y",  # Flipped y and z axes
    color="subcorpus", hover_name="word",
    title="t-SNE 3D visualization of word embeddings by subcorpus (independent t-SNE, axes flipped)",
    width=1200, height=900
)
fig.update_traces(marker=dict(size=3))
fig.update_layout(scene=dict(
    yaxis=dict(
        tickvals=[0, 1, 2, 3],
        ticktext=["christian_0_300", "christian_300_600", "pagan_0_300", "pagan_300_600"],
        title="Subcorpus"
    )
))
fig.show()

In [None]:
import pickle
with open("../data/large-data/word_neighbors.pkl", "rb") as f:
    neighbors_dict = pickle.load(f)

import plotly.graph_objects as go
from ipywidgets import widgets, interact
from IPython.display import display

def plot_filtered(word, subcorpus, n_neighbors=10):
    neighbors = [word]
    if word in neighbors_dict.get(subcorpus, {}):
        neighbors += neighbors_dict[subcorpus][word][:n_neighbors]
    mask = (df_plot['subcorpus'] == subcorpus) & (df_plot['word'].isin(neighbors))
    filtered = df_plot[mask]

    fig = go.Figure()
    # Plot all points in gray for context
    fig.add_trace(go.Scatter(
        x=df_plot[df_plot['subcorpus'] == subcorpus]['y'],
        y=df_plot[df_plot['subcorpus'] == subcorpus]['x'],
        mode='markers',
        marker=dict(size=3, color='lightgray'),
        name='context',
        text=df_plot[df_plot['subcorpus'] == subcorpus]['word'],
        hoverinfo='text',
        showlegend=False
    ))
    # Highlight neighbors
    fig.add_trace(go.Scatter(
        x=filtered['y'],
        y=filtered['x'],
        mode='markers+text',
        marker=dict(size=8, color='red'),
        name='neighbors',
        text=filtered['word'],
        hoverinfo='text',
        textposition='top center'
    ))
    fig.update_layout(
        title=f"t-SNE 2D for '{word}' and neighbors in {subcorpus}",
        xaxis_title="t-SNE dim 1 (flipped)",
        yaxis_title="t-SNE dim 2 (flipped)",
        width=900, height=700
    )
    fig.show()

# Widgets
word_widget = widgets.Text(
    value='θεός',
    description='Word:',
    disabled=False
)
subcorpus_widget = widgets.Dropdown(
    options=df_plot['subcorpus'].unique(),
    value=df_plot['subcorpus'].unique()[0],
    description='Subcorpus:',
    disabled=False,
)
neighbors_widget = widgets.IntSlider(
    value=10, min=1, max=30, step=1, description='Neighbors:'
)

ui = widgets.HBox([word_widget, subcorpus_widget, neighbors_widget])
out = widgets.interactive_output(
    plot_filtered,
    {'word': word_widget, 'subcorpus': subcorpus_widget, 'n_neighbors': neighbors_widget}
)
display(ui, out)

In [None]:
import pandas as pd
import pickle
import plotly.graph_objects as go
from ipywidgets import widgets
from IPython.display import display

df_plot = pd.read_pickle("../data/large-data/df_plot.pkl")
subcorpora = df_plot['subcorpus'].unique().tolist()

def load_neighbors(subcorpus):
    with open("../data/large-data/word_neighbors.pkl", "rb") as f:
        neighbors_dict = pickle.load(f)
    return neighbors_dict.get(subcorpus, {})

def plot_filtered(word, subcorpus, n_neighbors=10, show_context=True):
    # Filter to only the selected subcorpus
    sub_df = df_plot[df_plot['subcorpus'] == subcorpus]
    neighbors_dict = load_neighbors(subcorpus)
    neighbors = set([word])
    if word in neighbors_dict:
        neighbors.update(neighbors_dict[word][:n_neighbors])
    filtered = sub_df[sub_df['word'].isin(neighbors)]

    fig = go.Figure()
    if show_context:
        fig.add_trace(go.Scatter(
            x=sub_df['y'],
            y=sub_df['x'],
            mode='markers',
            marker=dict(size=3, color='lightgray'),
            name='context',
            text=sub_df['word'],
            hoverinfo='text',
            showlegend=False
        ))
    fig.add_trace(go.Scatter(
        x=filtered['y'],
        y=filtered['x'],
        mode='markers+text',
        marker=dict(size=8, color='red'),
        name='neighbors',
        text=filtered['word'],
        hoverinfo='text',
        textposition='top center'
    ))
    fig.update_layout(
        title=f"t-SNE 2D for '{word}' and neighbors in {subcorpus}",
        xaxis_title="t-SNE dim 1 (flipped)",
        yaxis_title="t-SNE dim 2 (flipped)",
        width=900, height=700
    )
    fig.show()

# Widgets
word_widget = widgets.Text(
    value='ἕκαστος',
    description='Word:',
    disabled=False
)
subcorpus_widget = widgets.Dropdown(
    options=subcorpora,
    value=subcorpora[0],
    description='Subcorpus:',
    disabled=False,
)
neighbors_widget = widgets.IntSlider(
    value=10, min=1, max=30, step=1, description='Neighbors:'
)
context_widget = widgets.Checkbox(
    value=True,
    description='Show context dots',
    disabled=False
)

ui = widgets.HBox([word_widget, subcorpus_widget, neighbors_widget, context_widget])
out = widgets.interactive_output(
    plot_filtered,
    {
        'word': word_widget,
        'subcorpus': subcorpus_widget,
        'n_neighbors': neighbors_widget,
        'show_context': context_widget
    }
)
display(ui, out)