# Samiske tegn

## Imports and setup

In [1]:
import pandas as pd
from pandas.io.formats.style import Styler
from collections import defaultdict
from string import punctuation

## Load CSV and add case information

In [2]:
letters = pd.read_csv("../data/common/samiske_bokstaver_med_sprak.csv")

#letters["er_stor_bokstav"] = letters["bokstav"].apply(str.isupper)
letters["lower"] = letters["bokstav"].apply(str.lower)

In [3]:
letters.tail()

Unnamed: 0,bokstav,unikode,nor,eng,sma,sme,smj,smn,sms,lower
96,ǯ,U+01EF,False,False,False,False,False,False,True,ǯ
97,ʒ,U+0292,False,False,False,False,False,False,True,ʒ
98,ʹ,U+02b9,False,False,False,False,False,False,True,ʹ
99,ʼ,U+02bc,False,False,False,False,False,False,True,ʼ
100,ˈ,U+02c8,False,False,False,False,False,False,True,ˈ


## Remove Norwegian and English letters

In [4]:
sami_letters = letters.query("not nor and not eng and (sma or sme or smj or smn or sms)")

language_codes = {
    "sme": "North Sámi",
    "sma": "South Sámi",
    "smj": "Lule Sámi",
    "smn": "Inare Sámi",
    "sms": "Skolt Sámi",
}

In [5]:
sami_letters.head()

Unnamed: 0,bokstav,unikode,nor,eng,sma,sme,smj,smn,sms,lower
52,Á,U+00C1,False,False,False,True,True,True,False,á
53,Â,U+00C2,False,False,False,False,False,True,True,â
54,Ä,U+00C4,False,False,False,False,False,True,True,ä
57,Ï,U+00CF,False,False,True,False,False,False,False,ï
58,Õ,U+00D5,False,False,False,False,False,False,True,õ


## Setup table where upper- and lowercased letters are together

In [77]:
combined_letters = defaultdict(list)

for lang_code, lang_name in language_codes.items():

    for lowercase in sami_letters["lower"].unique():        
        uppercase = lowercase.upper()

        # Get the uppercase version (if there are any)
        uppercase_query = letters.query(f"bokstav == '{uppercase}'")
        uppercase_in_language = (not uppercase_query.empty) and uppercase_query[lang_code].item()

        # Get the lowercase version (if there are any)
        lowercase_query = letters.query(f"bokstav == '{lowercase}'")
        lowercase_in_language = (not lowercase_query.empty) and lowercase_query[lang_code].item()

        combined = ""
        if uppercase_in_language:
            combined += uppercase
        if lowercase_in_language and uppercase != lowercase:
            combined += lowercase
        combined_letters[lang_name].append(combined)

table = pd.DataFrame(combined_letters)
table

Unnamed: 0,North Sámi,South Sámi,Lule Sámi,Inare Sámi,Skolt Sámi
0,Áá,,Áá,Áá,
1,,,,Ââ,Ââ
2,,,,Ää,Ää
3,,Ïï,,,
4,,,,,Õõ
5,,Öö,,,
6,Čč,,,Čč,Čč
7,Đđ,,,Đđ,Đđ
8,Ŋŋ,,Ŋŋ,Ŋŋ,Ŋŋ
9,Šš,,,Šš,Šš


## Export table to LaTeX

In [84]:
def make_cell(s):
    return r"\makecell{" + s + "}"
    
def make_multiline_cell(s):
    return make_cell(r"\\".join(s.split()))

styler = Styler(table.rename(columns=make_multiline_cell))
styler.hide(axis="index")

styler.apply(get_style, axis=1)
print(styler.to_latex(hrules=True, convert_css=True))
styler

\begin{tabular}{lllll}
\toprule
\makecell{North\\Sámi} & \makecell{South\\Sámi} & \makecell{Lule\\Sámi} & \makecell{Inare\\Sámi} & \makecell{Skolt\\Sámi} \\
\midrule
Áá &  & Áá & Áá &  \\
{\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}} Ââ & {\cellcolor[HTML]{DFDFDF}} Ââ \\
 &  &  & Ää & Ää \\
{\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}} Ïï & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  \\
 &  &  &  & Õõ \\
{\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}} Öö & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  \\
Čč &  &  & Čč & Čč \\
{\cellcolor[HTML]{DFDFDF}} Đđ & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}} Đđ & {\cellcolor[HTML]{DFDFDF}} Đđ \\
Ŋŋ &  & Ŋŋ & Ŋŋ & Ŋŋ \\
{\cellcolor[HTML]{DFDFDF}} Šš & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{DFDFDF}}  & {\cellcolor[HTML]{

\makecell{North\\Sámi},\makecell{South\\Sámi},\makecell{Lule\\Sámi},\makecell{Inare\\Sámi},\makecell{Skolt\\Sámi}
Áá,,Áá,Áá,
,,,Ââ,Ââ
,,,Ää,Ää
,Ïï,,,
,,,,Õõ
,Öö,,,
Čč,,,Čč,Čč
Đđ,,,Đđ,Đđ
Ŋŋ,,Ŋŋ,Ŋŋ,Ŋŋ
Šš,,,Šš,Šš
