In [2]:
import json
import pickle
import numpy as np
import requests
import plotly
import plotly.express as px
import pandas as pd
import matplotlib

In [3]:
# Load the dataset of positions of all words in 3D space
url = "https://raw.githubusercontent.com/CCS-ZCU/noscemus_ETF/master/data/coordinates3s_dict.pkl"
resp = requests.get(url)
coordinates3s_dict = pickle.loads(resp.content)

In [4]:
filtered_vocab_df = pd.read_json("https://raw.githubusercontent.com/CCS-ZCU/noscemus_ETF/master/data/filtered_vocab_df.json")
filtered_vocab_df.set_index("word", inplace=True)
#word_dict = filtered_vocab_df.apply(lambda row: "wordcount: " + str(row["subcorpus"]) + ", translation: " + row["transl"], axis=1).to_dict()

In [5]:
filtered_vocab_df.head(5)

Unnamed: 0_level_0,1501-1550,1551-1600,1601-1650,1651-1700,mean,in_lila_embeddings,in_lasla,in_operamaiora,transl
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
dico,42831,120825,41832,72221,69427.25,True,True,True,"say, call, tell"
omnis,33152,108530,46009,66387,63519.5,True,True,True,
facio,33323,109486,40134,53953,59224.0,True,True,True,"do, make, handle"
pars,32591,90667,43593,51249,54525.0,True,True,True,part
habeo,30884,87225,38386,57791,53571.5,True,True,True,"have, hold, possess, consider, think"


In [6]:
# The dataset has form of a dictionary, each key correspond to one of our six subcoropra:
coordinates3s_dict.keys()

dict_keys(['lasla', 'operamaiora', '1501-1550', '1551-1600', '1601-1650', '1651-1700'])

Values of each item correspond to word positions in 3D space. Thus each item consists of:
* `xs` - np.array of all x coordinates
* `ys` - np.array of all y coordinates
* `zs` - np.array of all z coordinates
* `words` - np.array of all words in the vocabulary
Thus, to extract positions of all word vectors in the 1651-1700 subcorpus, you have to run:

In [7]:
subcorpus = "1651-1700"
xs, ys, zs, words = coordinates3s_dict[subcorpus]
word_dict = filtered_vocab_df.apply(lambda row: "wordcount: " + str(row[subcorpus]) + ", translation: " + row["transl"], axis=1).to_dict()

For instance, of we want to get x,y,z coordinates of a _target_ word, we have to find its position in the `words` nd.array and subsequently use this position to index from `xs`, `ys`, `zs`.

In [8]:
target = "scientia"
i = np.where(words == target) # find the positional index
x, y, z = xs[i], ys[i], zs[i] # apply the positional index to navigate through xs and ys
print(x, y, z)

[0.33957076] [0.27663973] [0.5570164]


We can use something similar to get positions of multiple words at once:

In [9]:
wordlist = ["scientia", "sapientia", "cognitio", "disciplina"]
idx = [word[0] for word in enumerate(words) if word[1] in wordlist] # find the positional indeces
wordlist_xs, wordlist_ys, wordlist_zs = xs[idx], ys[idx], zs[idx] # extract xs and ys for words in the wordlist based on their positional indeces
print(wordlist_xs, wordlist_ys, wordlist_zs)

[0.33957076 0.48258787 0.3337054  0.35430148] [0.27663973 0.52705663 0.2713426  0.2833426 ] [0.5570164  0.7940444  0.5414133  0.57174116]


In [10]:
fig = px.scatter_3d(x=wordlist_xs, y=wordlist_ys, z=wordlist_zs, text=wordlist)

In [11]:
fig.update_layout(
    title='3D Scatter Plot of Wordlist',
    scene=dict(
        xaxis_title='X Axis',
        yaxis_title='Y Axis',
        zaxis_title='Z Axis'
    )
)

I think that the data in this shape (wordlist_xs, wordlist_ys, wordlist_zs, wordlist) can be plotted with `plotly` 3D scatter in a very straightforward way.

In [12]:
wordlist_file_url = "https://raw.githubusercontent.com/CCS-ZCU/noscemus_ETF/master/data/wordlist.json"
response = requests.get(wordlist_file_url)
wordlist = requests.get(wordlist_file_url).json()

In [13]:
# let's take a look at the first 10 words
wordlist[:10]

['cognitio',
 'disciplina',
 'notitia',
 'philosophia',
 'ars',
 'doctrina',
 'geometria',
 'mathematicus',
 'peritia',
 'studiosus']

Now we can obtain their coordinates the same way as above.
Once again, let start by choosing the positional data for the subcorpus we are interested in.



In [14]:
print(wordlist)

['cognitio', 'disciplina', 'notitia', 'philosophia', 'ars', 'doctrina', 'geometria', 'mathematicus', 'peritia', 'studiosus', 'eruditio', 'sapientia', 'res', 'mathematicarum', 'disco', 'institutio', 'cognitor', 'profiteor', 'moralis', 'addisco', 'studium', 'ueritas', 'ingenium', 'ignoratio', 'cultor', 'peritus', 'praeceptorum', 'theologia', 'cognosco', 'contemplatio', 'arcanus', 'praeceptum', 'artium', 'excolo', 'medicina', 'subsidium', 'omnis', 'perfectio', 'exercitatio', 'liberalis', 'dialecticus', 'opus', 'schola', 'doceo', 'meditatio', 'imperitia', 'medicinus', 'principium', 'ignoro', 'fateor', 'notio', 'propono', 'autoritas', 'necessarius', 'scholasticus', 'fundamentum', 'erudio', 'magistra', 'scriptum', 'scio', 'diuinus', 'felicitas', 'doctor', 'gloria', 'singularis', 'prudentia', 'naturalis', 'exercito', 'disputo', 'eruditus', 'perspicio', 'mereo', 'demonstro', 'agnosco', 'ingenius', 'inuentio', 'tia', 'utilitas', 'mysterium', 'politicus', 'utilis', 'intellegentia', 'professio', 

In [15]:
# now we can obtain their coordinates the same way as above:


#idx = [word[0] for word in enumerate(words) if word[1] in wordlist] # find the positional indeces
#wordlist_xs, wordlist_ys, wordlist_zs = xs[idx], ys[idx], zs[idx]

In [16]:
import plotly.graph_objects as go



hover_text = [word + ": " + word_dict[word] for word in wordlist]

fig = go.Figure(data=go.Scatter3d(
    x=wordlist_xs,
    y=wordlist_ys,
    z=wordlist_zs,
    mode='markers',
    marker=dict(
        size=5,
        color='purple',
        opacity=0.3
    ),
    text=hover_text,  # use mapped hover text
    hoverinfo='text',  # ensure only the text field is displayed on hover
))

fig.update_layout(
    title='Embeddings',
    scene=dict(
        xaxis_title='X Axis',
        yaxis_title='Y Axis',
        zaxis_title='Z Axis'
    ),
    hovermode='closest',
    showlegend=False
)

fig.show()

In [17]:
fig.write_html("./test.html")

In [19]:
pip install dash-ag-grid

Collecting dash-ag-grid
  Downloading dash_ag_grid-31.0.1-py3-none-any.whl.metadata (4.4 kB)
Collecting dash>=2 (from dash-ag-grid)
  Downloading dash-2.16.1-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash>=2->dash-ag-grid)
  Downloading flask-3.0.2-py3-none-any.whl.metadata (3.6 kB)
Collecting Werkzeug<3.1 (from dash>=2->dash-ag-grid)
  Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)
Collecting dash-html-components==2.0.0 (from dash>=2->dash-ag-grid)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash>=2->dash-ag-grid)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash>=2->dash-ag-grid)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting importlib-metadata (from dash>=2->dash-ag-grid)
  Downloading importlib_metadata-7.1.0-py3-none-any.whl.metadata (4.7 kB)
Collecting typing-extension



In [26]:
import dash_ag_grid as dag
from dash import Dash, html, dcc, Input, Output, callback, Patch
import pandas as pd

app = Dash(__name__)

df = wordlist

columnDefs = [{"word": col} for col in ['lasla', 'operamaiora', '1501-1550', '1551-1600', '1601-1650', '1651-1700']]

app.layout = html.Div(
    [
        html.Div('Quick Filter:'),
        dcc.Input(id="quick-filter-input", placeholder="filter..."),
        dag.AgGrid(
            id="quick-filter-simple",
            rowData=df.to_dict("records"),
            columnDefs=columnDefs,
            defaultColDef={"flex": 1},
            dashGridOptions={"animateRows": False}
        ),
    ]
)


@callback(
    Output("quick-filter-simple", "dashGridOptions"),
    Input("quick-filter-input", "value")
)
def update_filter(filter_value):
    newFilter = Patch()
    newFilter['quickFilterText'] = filter_value
    return newFilter


if __name__ == "__main__":
    app.run(debug=True)


AttributeError: 'list' object has no attribute 'to_dict'

In [25]:
#originál kódu
import dash_ag_grid as dag
from dash import Dash, html, dcc, Input, Output, callback, Patch
import pandas as pd

app = Dash(__name__)

df = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/ag-grid/olympic-winners.csv"
)

columnDefs = [{"field": col} for col in ['athlete', 'country', 'sport', 'age']]

app.layout = html.Div(
    [
        html.Div('Quick Filter:'),
        dcc.Input(id="quick-filter-input", placeholder="filter..."),
        dag.AgGrid(
            id="quick-filter-simple",
            rowData=df.to_dict("records"),
            columnDefs=columnDefs,
            defaultColDef={"flex": 1},
            dashGridOptions={"animateRows": False}
        ),
    ]
)


@callback(
    Output("quick-filter-simple", "dashGridOptions"),
    Input("quick-filter-input", "value")
)
def update_filter(filter_value):
    newFilter = Patch()
    newFilter['quickFilterText'] = filter_value
    return newFilter


if __name__ == "__main__":
    app.run(debug=True)
