# Text analysis of the [*Libellus de Medicinalibus Indorum Herbis*](https://en.wikipedia.org/wiki/Libellus_de_Medicinalibus_Indorum_Herbis), or the Codex de la Cruz-Badiano (1552)

From the William Gates translation into English (1939), published by the Maya Society ([link](https://en.wikipedia.org/wiki/File:The_De_la_Cruz-Badiano_Aztec_Herbal_of_1552.pdf)).

______

### Import necessary modules

In [1]:
import pandas as pd # for working with dataframes
from os import listdir # for retrieving files from directory
from os.path import isfile, join # for retrieving files from directory
from pathlib import Path # for retrieving files from directory

### Read in subchapter texts and create a list of file names

In [2]:
data_dir = "./modified_texts/" # set data directory
file_names = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] # create a list of file names
file_names.remove('.DS_Store') # remove .DS_Store file
file_names.sort() # sort the list of file names
file_names # check list of file names

subchapter_names = [] # get subchapter names without ".txt"
for i in range(len(file_names)):
    subchapter_names.append(file_names[i][:-4])

texts = [] # a list to store text strings
for i in file_names: # for each file
    # read in text, replace \n with space, store in list
    texts.append(Path(data_dir+i).read_text().replace('\n', ' ')) 

### Read in the data to verify presence of Náhuatl words in subchapters

In [3]:
df = pd.read_csv("./verify_subchapter.csv") # read in subchapter data
subchapter = df["subchapter"].str.split(";", expand=True) # split subchapter data
subchapter.fillna(0, inplace=True) # replace None and NaN with 0
nahuatl = df["nahuatl"] # isolate the nahuatl column
official_name = df["official_name"] # isolate the official_name column
ID = df["ID"] # isolate the ID column
type = df["type"] # isolate the type column

### Find names associated with chapters

In [4]:
# store information for search in lists
chapter_list = []
ID_list = []
type_list = []
name_list = []
official_list = []

for i in range(len(subchapter_names)): # for each subchapter

    #######################################################
    ### FIND NÁHUATL NAMES ASSOCIATED WITH EACH CHAPTER ###
    #######################################################

    chapter = subchapter_names[i] # get the current chapter name
    
    if chapter[0]=="0": # remove leading zeros from chapter name
        chapter = chapter[1:]
    
    matches = [] # save matches to chapter
    rows = [] # save rows/index in df to matches
    
    for j in range(len(subchapter)): # for each row 
        for k in range(len(subchapter.columns)): # for each column
    
            if chapter in str(subchapter.iloc[j,k]): # if subchapter found in entry, save to list
                matches.append(subchapter.iloc[j,k])
                rows.append(j)

    plus_one_chapter = "1"+chapter # add "1" to remove unwanted chapters

    remove_inds = [] # save the indices to remove in matches and rows
    for s in range(len(matches)):
        if plus_one_chapter in matches[s]:
            remove_inds.append(s)
    
    remove_inds.sort(reverse=True) # reverse sort indices for removal
    
    for index in remove_inds: # remove indices from matches and rows
        del matches[index]
        del rows[index]
    
    for l in range(len(matches)): # remove leading and trailing zeros from matches
        if matches[l][0] == " ":
            matches[l] = matches[l][1:]
        if matches[l][-1] == " ":
            matches[l] = matches[l][:-1]


    #############################################
    ### FIND MATCHES TO NÁHUATL NAMES IN TEXT ###
    #############################################
    
    nahuatl_names = [] # find nahuatl names for each matched index
    types = [] # find type for each matched index
    IDs = [] # find IDs for each matched index
    official_names = [] # find original names for each matched index
    for m in rows:
        nahuatl_names.append(nahuatl.iloc[m])
        types.append(type.iloc[m])
        IDs.append(ID.iloc[m])
        official_names.append(official_name[m])
    
    nahuatl_names = [x.lower() for x in nahuatl_names] # make all nahuatl names lowercase
    official_names = [x.upper() for x in official_names] # make all official names uppercase
    
    for n in range(len(nahuatl_names)): # split by "; " if multiple names
        if ";" in nahuatl_names[n]:
            nahuatl_names[n] = nahuatl_names[n].split("; ")
    
    for o in range(len(matches)): # find alternate names in matches
        if " " in matches[o]:
            matches[o] = matches[o][:matches[o].rfind(" ")]
        else: # if no alternate, replace with nahuatl name
            matches[o] = nahuatl_names[o]
    
    names_list = [] # combine the names in matches and nahuatl_names together for each entry
    for p in range(len(matches)):
        
        temp_list = []
        if isinstance(matches[p], list):
            for q in matches[p]:
                temp_list.append(q)
        else:
            temp_list.append(matches[p])
    
        if isinstance(nahuatl_names[p], list):
            for q in nahuatl_names[p]:
                temp_list.append(q)
        else:
            temp_list.append(nahuatl_names[p])
    
        names_list.append(temp_list)
    
    unique_names = [] # find only unique names for each entry
    for r in range(len(names_list)):
        unique_names.append(list(set(names_list[r])))

    # store information to lists
    chapter_list.append(chapter)
    ID_list.append(IDs)
    type_list.append(types)
    name_list.append(unique_names)
    official_list.append(official_names)



### Check if names are in the text

In [5]:
present_list = [] # overall present/absent chapter/IDs
absent_list = []

for i in range(len(chapter_list)):

    # get current data for the text
    curr_chapter = chapter_list[i]
    curr_txt = texts[i]
    curr_names = name_list[i]
    curr_IDs = ID_list[i]
    curr_types = type_list[i]
    curr_official = official_list[i]
    
    # for each of the current names
    # determine if present/absent in current text
    for j in range(len(curr_names)):
        present = "No"
        for name in curr_names[j]:
            if name in curr_txt:
                present = "Yes"
                break
        if present == "Yes": # save presence/absence info
            present_list.append([curr_chapter,curr_IDs[j], curr_types[j], curr_official[j], name])
        else:
            absent_list.append([curr_chapter,curr_IDs[j], curr_types[j], curr_official[j], name])

print("Present matches:",len(present_list))
print("Absent matches:",len(absent_list))

Present matches: 525
Absent matches: 0


### Annotate and replace text

In [6]:
present_df = pd.DataFrame(present_list, columns=["chapter","ID","type","official","match"]) # convert matches to dataframe

save_dir = "./annotated_texts/" # set directory to save annotated files to

chapters = present_df["chapter"].unique() # get the unique chapters

for i in range(len(chapters)): # for each subchapter

    curr_chapter = chapters[i] # get the current subchapter
    # mask for current subchapter and remove duplicate rows
    chapter_df = present_df[present_df["chapter"]==curr_chapter].drop_duplicates().reset_index(drop=True) # isolate
    
    if len(curr_chapter)==2: # add zeros if needed to subchapter name
        curr_chapter = "0"+curr_chapter
    
    print("### CHAPTER", curr_chapter, "###")
    
    curr_txt = Path(data_dir+curr_chapter+".txt").read_text().replace('\n', ' ') # read in current subchapter
    curr_txt = curr_txt.replace("  ", " ") # replace double spaces with single spaces
    
    for j in range(len(chapter_df)):
        curr_txt = curr_txt.replace(chapter_df["match"].iloc[j], 
                                    "["+chapter_df["ID"].iloc[j]+"; "+chapter_df["type"].iloc[j]+"; "+chapter_df["official"].iloc[j]+"]")
    
    print(curr_txt, "\n")

    with open(save_dir+curr_chapter+".txt", "w") as file: # Open the file in write mode ('w')
        # Write the data to the file
        file.write(curr_txt)


### CHAPTER 01a ###
Curation of the head. The fruit of the [ID028; plant; ECA-PATLI] plant, the [ID049; plant; IZTAC OCO-XOCHITL], the [ID150; plant; TE-AMOXTLI], the precious stones the [ID266; stone; TETLAHUITL], [ID267; stone; IZTAC TLALLI], [ID268; stone; EZTETL], [ID269; stone; TE-MAMATLATZIN], bruised up together in frigid water, allay heat in the head, and if in hot water an excess of cold. They are applied three times a day, morning, noon and evening, to be wrapped about the neck and throat over the supporting tendons and throat nerves. For pains in the head let him eat onions in honey, let him not sit in the sun, nor labor, nor enter the baths. 

### CHAPTER 01b ###
Boils. The leaves of the [ID215; plant; TLATLANQUAYE] plant, root of the [ID198; plant; TLAL-AHUEHUETL], [ID220; plant; TLA-YAPALONI] and [ID092; plant; CHIPAUAC XIHUITL] plant, well macerated in the yolk of egg without water, will thoroughly cleanse out head boils; they are to be applied daily, morning, noon and e