In [5]:
!pip install --upgrade kaleido

Collecting kaleido
  Using cached kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
Installing collected packages: kaleido
Successfully installed kaleido-0.2.1


In [1]:
import seaborn as sns
import pandas as pd
import plotly.express as px

# Systems

In [2]:
df_sys = pd.read_csv("../data/review_tables/sys-lang.tsv", sep="\t")

In [3]:
df_lang_cnt = df_sys.groupby("Name").count().reset_index()[["Name", "System"]].rename(columns={"System": "Count", "Name": "Language"}).sort_values("Count", ascending=False)

In [13]:
fig = px.bar(df_lang_cnt, x='Language', y='Count', text_auto=True, width=1000, height=500, color_discrete_sequence=["#8d9bb3"])

fig.update_layout(
    # title="Number of Systems per Language",
    font=dict(size=20, family="Times New Roman",),
    margin = dict(b=7, t=5, r=5, l=5),
)

fig.show()
fig.write_image("../data/img/systems-per-language.pdf")

In [None]:
df_sys.head()

Unnamed: 0,Language,System,Language Family,Language Branch,Language Subgroup,Writing System,Script/Alphabet,Name
0,en,QALL-ME,Indo-European,Germanic,North Sea Germanic,Alphabetical,Latin,English
1,en,N. Aggarwal,Indo-European,Germanic,North Sea Germanic,Alphabetical,Latin,English
2,en,QAKiS,Indo-European,Germanic,North Sea Germanic,Alphabetical,Latin,English
3,en,SWSNL,Indo-European,Germanic,North Sea Germanic,Alphabetical,Latin,English
4,en,UTQA,Indo-European,Germanic,North Sea Germanic,Alphabetical,Latin,English


In [53]:
fig = px.sunburst(df_sys, path=['Language Family', 'Language Branch', "Name"], width=1150, height=800
                  #color_continuous_midpoint=np.average(df['lifeExp'], weights=df['pop'])
                  , color_discrete_sequence=["#8d9bb3", "#b38d8d", "#8db392"]
                  )

fig.add_annotation(text="Turkic", xanchor="left", x=0.62, y=0.495, ax=220, ay=-300)
fig.add_annotation(text="Common Turkic", xanchor="left", x=0.645, y=0.49, ax=230, ay=-270)

fig.add_annotation(text="Sino-Tibetan", xanchor="left", x=0.6, y=0.425, ax=300, ay=70)
#fig.add_annotation(text="Sinitic", xanchor="left", x=0.69, y=0.472, ax=280, ay=-100)


fig.add_annotation(text="Afro-Asiatic", xanchor="left", x=0.62, y=0.463, ax=320, ay=45)
fig.add_annotation(text="Dravidian", xanchor="left", x=0.62, y=0.48, ax=320, ay=-75)
fig.add_annotation(text="Southern", xanchor="left", x=0.69, y=0.47, ax=300, ay=-55)
# fig.add_annotation(text="Semitic", xanchor="left", x=0.75, y=0.433, ax=240, ay=48)

fig.add_annotation(text="Graeco-Phrygian", xanchor="left", x=0.7, y=0.323, ax=190, ay=58)
fig.add_annotation(text="Armenian", xanchor="left", x=0.68, y=0.295, ax=190, ay=98)

fig.update_layout(
    # title="Distribution of language groups and branches supported by the KGQA systems",
    font=dict(size=20, family="Times New Roman"),
    uniformtext=dict(minsize=20, mode='hide'),
    legend_title_text='System Count',
    margin = dict(b=5, t=5),

)

fig.show()
fig.write_image("../data/img/language-families.pdf")