# This jupyter notebook is written to get the true definition of the words from Merriam-Webster dictionary.

## We ended up using the balderdash_words1.csv files.


In [1]:
import os
import pandas as pd
import requests

from tqdm import tqdm
from tqdm.notebook import tqdm

from dotenv import load_dotenv

load_dotenv()

tqdm.pandas()

In [2]:
balderdash_words1_path = os.path.join(os.getcwd(), "data", "balderdash_words1.csv")
balderdash_words1_path

'/Users/parsahejabi/Codebases/GitHub_Repositories/USC-CSCI699-HistoryofLanguage-AI_Balderdash/data/balderdash_words1.csv'

In [3]:
balderdash_words2_path = os.path.join(os.getcwd(), "data", "balderdash_words2.csv")
balderdash_words2_path

'/Users/parsahejabi/Codebases/GitHub_Repositories/USC-CSCI699-HistoryofLanguage-AI_Balderdash/data/balderdash_words2.csv'

In [4]:
balderdash_words1_df = pd.read_csv(balderdash_words1_path)
balderdash_words1_df

Unnamed: 0,words,def,POS
0,wadmiltilt,['A strong rough woolen cloth employed to cove...,['noun']
1,scrivello,"[""An elephant's tusk of less than 20 pounds in...",['noun']
2,scapulimancy,['Divination by means of a shoulder-blade: sam...,"['noun', 'noun']"
3,queez-madam,"['The cuisse-madam, a French jargonelle pear.']",['noun']
4,progger,['One who progs; a rambling or aimless searche...,"['noun', 'noun']"
...,...,...,...
228,doromania,['An unusual urge or preoccupation with giving...,['noun']
229,pogonip,"['A frozen fog, formed in the coldest weather ...","['noun', 'noun', 'noun']"
230,oogonium,['Any of the cells that give rise through mito...,"['noun', 'noun', 'noun', 'noun', 'noun', 'noun..."
231,decussated,['Simple past tense and past participle of dec...,['verb']


In [5]:
balderdash_words2_df = pd.read_csv(balderdash_words2_path)
balderdash_words2_df

Unnamed: 0,words,def
0,Abibliophobia,The fear of running out of reading material.
1,Absquatulate,To leave or abscond with something.
2,Allegator,Some who alleges.
3,Anencephalous,Lacking a brain.
4,Argle-bargle,A loud row or quarrel.
...,...,...
97,Unremacadamized,Having not been repaved with macadam.
98,Vomitory,An exit or outlet.
99,Wabbit,"Exhausted, tired, worn out."
100,Widdershins,In a contrary or counterclockwise direction.


In [6]:
def get_word_definition_and_pos(word, api_key):
    """
    Makes a request to the Merriam-Webster Dictionary API for a given word and returns the first definition
    and part of speech (POS) if available.

    Parameters:
    word (str): The word for which the definition and POS are requested.
    api_key (str): Your Merriam-Webster API key.

    Returns:
    tuple: A tuple containing the first definition and POS of the word. If multiple
    definitions or POS tags are present, only the first is returned. If no definition
    or POS is found, None is returned for each.
    """
    url = f"https://dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={api_key}"
    response = requests.get(url)
    response_data = response.json()

    # Check if the response contains data and extract the first definition and POS
    if (
        response_data
        and isinstance(response_data, list)
        and "shortdef" in response_data[0]
        and "fl" in response_data[0]
    ):
        first_definition = response_data[0]["shortdef"][0] if response_data[0]["shortdef"] else None
        pos = response_data[0]["fl"] if response_data[0]["fl"] else None
        return first_definition, pos
    else:
        # Handle cases where the word might not be found or API response is unexpected
        return None, None

In [7]:
# Make a single dataframe containing all the words from both CSV files
balderdash_words_df = pd.concat([balderdash_words1_df, balderdash_words2_df], ignore_index=True)
balderdash_words_df

Unnamed: 0,words,def,POS
0,wadmiltilt,['A strong rough woolen cloth employed to cove...,['noun']
1,scrivello,"[""An elephant's tusk of less than 20 pounds in...",['noun']
2,scapulimancy,['Divination by means of a shoulder-blade: sam...,"['noun', 'noun']"
3,queez-madam,"['The cuisse-madam, a French jargonelle pear.']",['noun']
4,progger,['One who progs; a rambling or aimless searche...,"['noun', 'noun']"
...,...,...,...
330,Unremacadamized,Having not been repaved with macadam.,
331,Vomitory,An exit or outlet.,
332,Wabbit,"Exhausted, tired, worn out.",
333,Widdershins,In a contrary or counterclockwise direction.,


In [8]:
# Drop the POS column
balderdash_words_df = balderdash_words_df.drop(columns=["POS"])
balderdash_words_df

Unnamed: 0,words,def
0,wadmiltilt,['A strong rough woolen cloth employed to cove...
1,scrivello,"[""An elephant's tusk of less than 20 pounds in..."
2,scapulimancy,['Divination by means of a shoulder-blade: sam...
3,queez-madam,"['The cuisse-madam, a French jargonelle pear.']"
4,progger,['One who progs; a rambling or aimless searche...
...,...,...
330,Unremacadamized,Having not been repaved with macadam.
331,Vomitory,An exit or outlet.
332,Wabbit,"Exhausted, tired, worn out."
333,Widdershins,In a contrary or counterclockwise direction.


In [9]:
# Rename "words" to "word" and "def" to "dictionary_definition"
balderdash_words_df = balderdash_words_df.rename(columns={"words": "word", "def": "dictionary_definition"})
# Clear the "dictionary_definition" column
balderdash_words_df["dictionary_definition"] = None
# Create a new column "pos" and fill it with None
balderdash_words_df["pos"] = None
balderdash_words_df

Unnamed: 0,word,dictionary_definition,pos
0,wadmiltilt,,
1,scrivello,,
2,scapulimancy,,
3,queez-madam,,
4,progger,,
...,...,...,...
330,Unremacadamized,,
331,Vomitory,,
332,Wabbit,,
333,Widdershins,,


In [10]:
# For each word in the dataframe, get the definition and POS from the Merriam-Webster API
# and update the corresponding columns in the dataframe
progress_bar = tqdm(balderdash_words_df.iterrows(), total=balderdash_words_df.shape[0])

for index, row in progress_bar:
    word = row["word"]
    definition, pos = get_word_definition_and_pos(word, os.getenv("DICTIONARY_API_KEY"))
    if definition is not None:
        balderdash_words_df.at[index, "dictionary_definition"] = definition
        balderdash_words_df.at[index, "pos"] = pos
    else:
        # Write in tqdm progress bar: "Word {word} not found in the dictionary."
        progress_bar.set_postfix({"word": word, "status": "not found"})
        # Write -1 in the dictionary_definition and pos column if the word is not found
        balderdash_words_df.at[index, "dictionary_definition"] = -1
        balderdash_words_df.at[index, "pos"] = -1

balderdash_words_df

  0%|          | 0/335 [00:00<?, ?it/s]

Unnamed: 0,word,dictionary_definition,pos
0,wadmiltilt,-1,-1
1,scrivello,-1,-1
2,scapulimancy,-1,-1
3,queez-madam,-1,-1
4,progger,-1,-1
...,...,...,...
330,Unremacadamized,-1,-1
331,Vomitory,an entrance piercing the banks of seats of a t...,noun
332,Wabbit,-1,-1
333,Widdershins,"in a left-handed, wrong, or contrary direction...",adverb


In [11]:
# Save the updated dataframe to a new CSV file
balderdash_words_with_definitions_path = os.path.join(os.getcwd(), "data", "balderdash_words_with_definitions.csv")
balderdash_words_df.to_csv(balderdash_words_with_definitions_path, index=False)