# Interactive Walker Dictionary

## Source code Section

In [1]:
import pandas as pd
import re
from datetime import datetime
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, clear_output

pathDicoNormalised = "./../../datas/03-dicos-normalised/1809-1827_Walker-J/"
pathCsvFile = pathDicoNormalised+"1809-1827_Walker-J_Norm.csv"
dfWalker = pd.read_csv(pathCsvFile, sep=";", encoding="utf-8", dtype ={'headword':str,'idSuperEntry':str}, low_memory=False)
#print("nb entries",len(dfWalker))

In [2]:
basePathOut = "./savedDf/"

Path(basePathOut).mkdir(parents=True, exist_ok=True)

fileDfSavedDatas = basePathOut+"savedDf.csv"
dfSavedDatas = pd.DataFrame(columns={"idFilename","hwdFilter","nbSyllFilter","nbOcc"})
dfSavedDatas.to_csv(fileDfSavedDatas,encoding="utf8", sep =";", index=False)

In [3]:
def saveDf(btn):
    hwdFilt = wid_hwd.value
    nbSyllFilt = wid_nbsyll.value

    dfWalkerFiltered = filtDf(hwdFilt, nbSyllFilt)
        
    filenameDate = datetime.now().strftime("%d-%m-%Y_%I-%M-%S_%p")
    filenameCsvOut = basePathOut+"df_"+filenameDate+".csv"
    dfWalkerFiltered.to_csv(filenameCsvOut,encoding="utf8", sep =";", index=False)
    
    filenameXlsOut = basePathOut+"df_"+filenameDate+".xls"
    dfWalkerFiltered.to_excel(filenameXlsOut, index=True)
    
    with outputSavedData:
        dfSavedDatas = pd.read_csv(fileDfSavedDatas, sep=";", encoding="utf-8", dtype ={'headword':str,'idSuperEntry':str}, low_memory=False)
        dfSavedDatas = dfSavedDatas.append({"idFilename":filenameDate,"hwdFilter":hwdFilt,"nbSyllFilter":nbSyllFilt, "nbOcc":str(len(dfWalkerFiltered))},ignore_index=True)
        dfSavedDatas.to_csv(fileDfSavedDatas,encoding="utf8", sep =";", index=False)
        
        clear_output()
        display(dfSavedDatas)
                
wid_btnSaveDf = widgets.Button(description="save dataframe")
wid_btnSaveDf.on_click(saveDf)

In [4]:
def filtDf(strHwd, rangeSyll):    
    dfWalkerFiltered = dfWalker[dfWalker['hwdR'].str.contains(strHwd, na=False,flags=re.IGNORECASE, regex=True)]
    dfWalkerFiltered = dfWalkerFiltered[dfWalkerFiltered['nbSyll'].between(rangeSyll[0], rangeSyll[1], inclusive=True)]
    return dfWalkerFiltered

def filterFct(strHwd, rangeSyll):
    if strHwd=="":
        strHwd = ".*"
    else:
        pass
    
    dfWalkerFiltered = filtDf(strHwd, rangeSyll)
    lnbocc = widgets.Label("nb. occ. : "+str(len(dfWalkerFiltered)))
    display(lnbocc)
    display(dfWalkerFiltered)

In [5]:
nbSyllMin = dfWalker["nbSyll"].min()
nbSyllMax = dfWalker["nbSyll"].max()

optionsNbSyll = range(nbSyllMin, nbSyllMax+1)
wid_nbsyll = widgets.SelectionRangeSlider(
    options=optionsNbSyll,
    index=(min(optionsNbSyll)-1,max(optionsNbSyll)-1),
    description='nb syll',
    disabled=False
)

In [6]:
wid_hwd = widgets.Text(placeholder='.*',value='^pre',description="Hwd")

In [7]:
outputSavedData = widgets.Output()

In [8]:
ui = widgets.HBox([wid_hwd, wid_nbsyll, wid_btnSaveDf])
out = widgets.interactive_output(filterFct, {'strHwd': wid_hwd, "rangeSyll":wid_nbsyll})

## User interface Section
### Memo. expression régulière pour filtrage de heaword

* Afficher les mots qui commencent par pre : ^pre

* Afficher les mots qui finissent par ing : ing$

* Afficher les mots qui contiennent un o suivi d'un a ou e ou i ou u : o[aeiou]

* Afficher les mots de 3 lettres : ^.{3}$

* Afficher les mots de 3 à 5 lettres : ^.{3,5}$

In [9]:
display(ui, out)

HBox(children=(Text(value='^pre', description='Hwd', placeholder='.*'), SelectionRangeSlider(description='nb s…

Output()

### Saved datas

In [10]:
display(outputSavedData)

Output()