<a href="https://colab.research.google.com/github/FB-Decipherer/Experiment-17/blob/main/April_24_Word_Clusters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [304]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [305]:
import os
import sys
from pathlib import Path
from google.colab import files

import operator
from datetime import date

import pandas as pd
import csv
import re

from IPython.display import HTML
import matplotlib.pyplot as plt

import nltk
nltk.download('stopwords')
from nltk import sent_tokenize

from wordcloud import WordCloud

import warnings
warnings.simplefilter('ignore', FutureWarning)

#!pip install weasyprint
import weasyprint

VERSION_DATE = '04-22-2024'


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


>[create_word_dictionaries](#scrollTo=aZnkrBe57pVi)

>[create_guide_word_dictionaries](#scrollTo=S0OqJetx8cyw)

>[create_master_guide_word_dictionary](#scrollTo=VrdK8wJH9Guz)

>[create_styled_html_pages](#scrollTo=hyCS89xC7PQk)

>[create_styled_html_sentence_pages](#scrollTo=3tLxy0cK5OkR)

>[create_word_clouds](#scrollTo=nY-bkOQX8tCr)



In [306]:
# Set Colab Content Root:

def get_content_root():
  return '/content/drive/MyDrive/Word Cipher/Word Clusters/'


In [307]:
# Get Common folder Root:

def get_common_root():
  return get_content_root() + 'Common/'


In [308]:
def get_works_root():
  return get_content_root() + 'Works/'


In [309]:
# Not the Working Directory, but for one of the 16 'Works':

def get_work_dir(work_name):

  path = get_today_path(work_name)

  if os.path.isdir(path):
    pass
  else:
    print('The file ' + path + ' does not exist')

  return path

#print(get_work_dir('Hamlet'))

In [310]:
# Get all Metadata for the Works:

def get_works_metatdata():

  meta_path = get_common_root() + "All Works Metadata.csv"

  return meta_path

#print(get_works_metatdata())

In [311]:
# Get path to original unaltered source text for one of the Works:

def get_work_source_text(work_name):

  path = get_common_root() + 'Works Source' + '/' + work_name + '.txt'

  if os.path.isfile(path):
    pass
  else:
    print('The file ' + path + ' does not exist')

  return path

#print(get_work_source_text('Hamlet'))

In [312]:
# Each new day-indexed Version, create a new empty folder for each of the Works:

def get_today_path(work_name):

  today_path = get_content_root() + get_creation_day() + '/' + work_name + '/'

  if not os.path.isdir(today_path):
    os.makedirs(today_path, exist_ok=True) # go ahead and overwrite everything.

  return today_path

#get_today_path('Othello')

In [313]:
# Folder name for all the Works is over form 04-14-2024:

def get_creation_day():

  creation_day = date.today().strftime("04-22-2024")

  return creation_day

#print(get_creation_day())

In [314]:
# Get the Path Names of the Source Texts as a list, read from a .csv file:

def get_work_names():

  source_file_metadata = get_works_metatdata()

  with open(source_file_metadata, 'r') as csvfile:

      # Create a CSV reader object
      reader = csv.reader(csvfile)

      # Read the csv header row
      header = next(reader)

      # Create an empty list to store the strings from the last column
      source_file_names = []

      # Iterate over the rows in the CSV file
      for row in reader:
          # Get the string from the last column
          work_name = row[-1]
          source_file_names.append(work_name)

  csvfile.close()

  return source_file_names

#print(get_work_names())

In [315]:

def get_work_words_dict(work_name):

  word_count_file_path = get_content_root() + get_creation_day() + '/' + work_name + '/' + work_name + '.csv'

  with open(word_count_file_path, "r") as f:
    word_count_dict = f.read()

    print(work_name)
    #print(word_count_dict)

    reader = csv.DictReader(f)
    data = {}
    for row in reader:
      data[row["Word"]] = row

  return data

#print(get_work_words_dict('Othello'))

In [316]:
# Create new Works directories for each Work for Today if not existing:

today_file_path = get_content_root() + get_creation_day() + '/'

os.makedirs(today_file_path, exist_ok = True)

work_name_list = get_work_names()

for work_name in work_name_list:

  work_dir = today_file_path + work_name + '/'
  os.makedirs(work_dir, exist_ok = True)


In [317]:
# Make Source Text All Lower case:

def remove_uppercase(text):
  return text.lower()


In [318]:
# Remove Source Text Punctuation:

def remove_punctuation(text):

  new_text = ""
  return re.sub(r'[^\w\s]', '', text)


In [319]:
# Limit Size of Dictionary Values:

def limit_dict_values(dict1, value):
  """Removes all items from a dictionary if the value is less than the input.

  Args:
    dict: The dictionary to remove the item from.
    value: The value limit for removal (int).

  Returns:
    A new dictionary with the items removed.
  """

  dict2 = {}
  for key, val in dict1.items():
    if val >= value:
      dict2[key] = val

  return dict2


In [320]:
# Create a Dictionary of the Word Occurrances in One Source Text:

def create_word_count_dict(filepath, word_occurrance_min, word_length_min):
  """

  Args:
    filename: The path to the text file
    word_occurrance_min: Min value for inclusion in the dict
    word_length_min: Min value for inclusion in the dict

  Returns:
    A dict of words with word counts
  """

  f = open(filepath, "r")

  # Read the text file into a string.
  text = f.read()

  text = remove_punctuation(text)
  text = remove_uppercase(text)

  # Split the text into a list of words.
  words = text.split()

  # Create a dictionary to store the word counts.
  dict_word_counts = {}

  for word in words:
    if word not in dict_word_counts:
      dict_word_counts[word] = 0

    if(len(word) > word_length_min):
      dict_word_counts[word] += 1

  # Remove items from the dictionary if the value is less than specified:
  dict_copy = limit_dict_values(dict_word_counts, word_occurrance_min)

  return dict_copy

#print(create_word_count_dict("/content/drive/MyDrive/Word Cipher/Word Clusters/Common/Works Source/A Midsummer Night's Dream.txt", 5, 5))

In [321]:
# Display a dataframe for a dict in a csv file:

def display_csv_dict_in_dataframe(csv_dict_path):

  # Read the CSV file into a dictionary
  with open(csv_dict_path, 'r') as f:

    reader = csv.DictReader(f)
    dict_data = list(reader)

    # Convert the dictionary to a Pandas DataFrame
    df = pd.DataFrame(dict_data)

    return df

#display_csv_dict_in_dataframe("/content/drive/MyDrive/Word Cipher/Word Clusters/Common/All Works Metadata.csv")

In [322]:
# Display a dict in dataframe:

def display_word_dict_in_dataframe(word_dict):

  # Read the CSV file into a dictionary
  with open(word_dict, 'r') as f:

    reader = csv.DictReader(f)
    dict_data = list(reader)

    # Convert the dictionary to a Pandas DataFrame
    df = pd.DataFrame(dict_data)

    return df

#csv_dict_path = "/content/drive/MyDrive/Word Cipher/Word Clusters/04-14-2024/All's Well That Ends Well/All's Well That Ends Well guide words.csv"
#with open(csv_dict_path, 'r') as f:
##  reader = csv.DictReader(f)
#  dict_data = list(reader)
  #print(display_word_dict_in_dataframe(dict_data))
#  display(display_word_dict_in_dataframe(dict_data))

In [323]:
# Limit Size of Dictionary Count Values:

def sort_descending(dict1):
  """Creates and returns a new dictionary with the one column sorted by value, descending.

  Args:
    dict1: The dictionary to sort.

  Returns:
    A new dictionary with the columns sorted by value.
  """

  dict2 = {}

  for key, value in sorted(dict1.items(), key=operator.itemgetter(1), reverse=True):
    dict2[key] = value

  return dict2


In [324]:

def save_word_occurrences( work_name, word_occurrences_dict):

  word_occurrences_dict = sort_descending(word_occurrences_dict)
  word_count_file_path = get_work_dir(work_name) + work_name + '.csv'

  # Open the new CSV file in write mode
  with open(word_count_file_path, 'w', newline='') as csvfile:
      # Create a CSV writer object
      writer = csv.writer(csvfile)

      # Write the header row
      writer.writerow(['Word', 'occurrence'])

      # Write the dictionary to the CSV file
      for key, value in word_occurrences_dict.items():
          writer.writerow([key, value])

  # Close the CSV file
  csvfile.close()


# create_word_dictionaries

In [325]:
# Create and Save a Filtered Dictionary of Word Occurrances for Each Source Text:

def create_word_dictionaries():

  work_name_list = get_work_names()

  word_count_min  = 5
  word_length_min = 5

  for work_name in work_name_list:

    source_file_path = get_work_source_text(work_name)

    dict_word_counts = create_word_count_dict(source_file_path,
                                              word_count_min, word_length_min)

    #print(dict_word_counts)

    # dict_word_counts = remove_external_stop_words(dict_word_counts)
    # dict_word_counts = remove_internal_stop_words(dict_word_counts)
    # dict_word_counts = remove_work_specfic_stop_words(work_name, dict_word_counts)

    #display_word_dict_in_dataframe(dict_word_counts)

    save_word_occurrences(work_name, dict_word_counts)

#create_word_dictionaries()

# create_guide_word_dictionaries

In [326]:
# Create and Save a Dictionary of Guide Word Occurrances for Each Source Text:

def create_guide_word_dictionaries():

  work_name_list = get_work_names()

  for work_name in work_name_list:

    work_guide_words_dict = get_work_guide_words(work_name)

    save_guide_word_occurrences(work_name, work_guide_words_dict)

#create_guide_word_dictionaries()


In [327]:
# Get List of Guide Words:

guide_word_list = ["fortune", "nature", "honour", "reputation"]

def get_guide_word_list():
  return guide_word_list


In [328]:

def get_work_guide_words(work_name):

  gwl = get_guide_word_list()
  gwsl = get_guide_word_substring_list()

  work_guide_words = {}

  source_file_path = get_work_source_text(work_name)

  with open(source_file_path, "r") as f:
    file_contents = f.read()
    file_contents = file_contents.lower()

    gwsl = get_guide_word_substring_list()

    for string in gwsl:
      occurrences = re.findall(string, file_contents)
      full_str = get_full_str(string)
      work_guide_words[full_str] = len(occurrences)

  return work_guide_words

#print(get_work_guide_words('Hamlet'))

In [329]:
##save_guide_word_occurrences

def save_guide_word_occurrences( work_name, word_occurrences_dict):

  word_occurrences_dict = sort_descending(word_occurrences_dict)

  #df = pd.DataFrame(columns=['Word', 'Occurrances'])

  guide_word_count_file_path = get_work_dir(work_name) + work_name  + ' guide words' + '.csv'

  #print(guide_word_count_file_path)

 # Open a CSV file in write mode
  with open(guide_word_count_file_path, "w", newline="") as csvfile:

    # Create a CSV writer object
    writer = csv.writer(csvfile)

    # Write the header row
    writer.writerow(["Count", "Occurrences"])

    # Write the data rows
    for row in word_occurrences_dict.items():
        writer.writerow(row)


# create_master_guide_word_dictionary

In [330]:
# Get List of Substring Guide Words:

guide_word_substring_list = ["fortun", "natur", "hono", "reput"]

def get_guide_word_substring_list():
  return guide_word_substring_list


def get_guide_word_substring(whole_word):

  dict = {}
  for i in range(len(guide_word_list)):
    dict[guide_word_list[i]] = guide_word_substring_list[i]

  return dict[whole_word]

In [331]:
# Remove Dictionary Items:

def remove_dict_entries(dict1, list1):
  """Removes entries from a dict based on a list of keys.

  Args:
    dict1: The dict to remove entries from.
    list1: The list of keys to remove.

  Returns:
    An updated dict with the entries removed.
  """

  dict2 = {}
  for key, value in dict1.items():
    if key not in list1:
      dict2[key] = value

  return dict2


In [332]:
# Remove External Stop Words:

def remove_external_stop_words(ext_stop_words_dict):

  ext_stop_words = nltk.corpus.stopwords.words('english')
  ext_stop_words_dict = dict.fromkeys(ext_stop_words)

  # Show the external stop words:
  df_4 = pd.DataFrame.from_dict(ext_stop_words_dict, orient="index", columns=["The Ext Stop Words"])
  # df_4

In [333]:
# Remove Internal Stop Words:

def remove_internal_stop_words(source_text):

  my_stop_words = ["things", "within" , "should", "before","though","againe","live", "bring", "finde", "heare", "see", "leaue" , "ere", "put", "giue", "shee", "hee","haue", "heere", "come", "make", "let","take", "give", "heere", "par", "shall", "lord"]

  dict_word_counts = remove_dict_entries(dict_word_counts, my_stop_words)

  df_3 = pd.DataFrame.from_dict(dict_word_counts, orient="index", columns=["Minus My Stop Words"])
  #df_3

In [334]:
# Remove Work-specific Stop Words:

def remove_work_specfic_stop_words(work_name, dict_word_counts):

  source_file_path = get_work_dir(work_name) + work_name + ".csv"

  if os.path.isfile(source_file_path):
    with open(source_file_path, "r") as f:
      reader = csv.reader(f)
      first_row = next(reader)
      # Save the first column of the CSV file as a list
      first_column_list = [row[0] for row in reader]
      dict_word_counts = remove_dict_entries(dict_word_counts, first_column_list)
  else:
    #print('The file ' + source_file_path + ' does not exist')
    pass

  return dict_word_counts

In [335]:
#888
def get_guide_word_count_for_work(work_name, guide_word):

  # Open the CSV file

  word_count_file_path =  get_work_dir(work_name) + work_name + ' guide words' + '.csv'

  # print(word_count_file_path)


  with open(word_count_file_path, 'r') as f:

      # Create a CSV reader object
      reader = csv.reader(f)

      # Read the header row
      header = next(reader)

      # Create a dictionary to store the data
      data = {}

      # Iterate over the rows in the CSV file
      for row in reader:
          # Get the key and value from the row
          key, value = row[0], row[1]

          # Add the key-value pair to the dictionary
          data[key] = value

  # Look up the value for the given key
  key = guide_word
  gwc = data.get(key)

  return gwc


In [336]:
# Get HTML for Color Coding Words:

def get_guide_word_html(guide_word, color):

  style = {"color": color, "text-decoration": "none", "font-weight": "bold"}
  html_output = generate_html_for_inline_styles_of_text(guide_word, style)

  return html_output

In [337]:
# Get HTML for Styling Words:

def generate_html_for_inline_styles_of_text(text, style):
  """Generates HTML for inline style of text.

  Args:
    text: The text to be styled.
    style: A dictionary of CSS properties and values.

  Returns:
    A string containing the HTML for the inline style of text.
  """

  html = "<span style='{}'>{}</span>".format(
      "; ".join(["{}: {}".format(k, v) for k, v in style.items()]), text)

  return html


In [338]:
def list_to_html_table(list_of_strings):
  """Converts a list of strings into an HTML table.

  Args:
    list_of_strings: A list of strings.

  Returns:
    An HTML table string.
  """

  for sentence_string in list_of_strings:
    guide_word = 'fortune'
    color = 'red'

  df = pd.DataFrame(list_of_strings, columns=["Unfiltered Word Counts"])

  html_table = """<table border=1 >"""
  for string in list_of_strings:
    html_table += """<tr><td>{}</td></tr>""".format(string)
  html_table += """</table>"""

  return html_table


In [339]:
def replace_substring(list_of_str, substring, new_string):
  """Replaces each item in a list of str with
  #  a substr in each item replaced by the same str.

  Args:
    list_of_str: A list of str.
    substring: The substring to be replaced.
    new_string: The new string to replace the substring with.

  Returns:
    A list of str with the substring replaced in each item.
  """

  new_list_of_str = []
  for item in list_of_str:
    new_item = item.replace(substring, new_string)
    new_list_of_str.append(new_item)
  return new_list_of_str


In [340]:
def get_guide_sentences(source_file_path, which_guide_word):

  with open(source_file_path, "r") as f:
    file_contents = f.read()
    sentences = file_contents.split(".")

    guide_sentences = []
    for sentence in sentences:
      if "fortune" in sentence:
          guide_sentences.append(sentence)

  return guide_sentences


In [341]:

def get_guide_word_color(guide_word):

	guide_word_colors = {
	    "fortune": "red",
	    "nature": "green",
	    "honour": "blue",
	    "reputation": "orange"
	}

	return guide_word_colors[guide_word]


In [342]:
def dict_to_html_table(dict):
  """Converts a dictionary to an HTML table.

  Args:
    dict: A dictionary.

  Returns:
    An HTML table string.
  """

  html = "<table>"
  for key, value in dict.items():
    html += "<tr><td>{}</td><td>{}</td></tr>".format(key, value)
  html += "</table>"
  return html



In [343]:
def get_full_str(sub_str):

    match sub_str:
        case "fortun":
            return "fortune"
        case "natur":
            return "nature"
        case "hono":
            return "honour"
        case "reput":
            return "reputation"
        case _:
            return "Invalid string"

    return full_str

In [344]:
# Create and Save a Dictionary of Guide Word Occurrances in Common dir:

def create_master_guide_word_dictionary():

  fortune_counts = []
  nature_counts = []
  honour_counts = []
  reputation_counts = []

  df = pd.DataFrame()

  work_name_list = get_work_names()

  for work_name in work_name_list:

      work_guide_words_dict = get_work_guide_words(work_name)

      fortune_counts.append(work_guide_words_dict['fortune'])
      nature_counts.append(work_guide_words_dict['nature'])
      honour_counts.append(work_guide_words_dict['honour'])
      reputation_counts.append(work_guide_words_dict['reputation'])

      #df = pd.DataFrame.from_dict(work_guide_words_dict, orient="index", columns=[""])
      #print(df)

  df = pd.DataFrame({'Work': work_name_list})
  df = df.assign(Fortune=fortune_counts)
  df = df.assign(Nature=nature_counts)
  df = df.assign(Honour=honour_counts)
  df = df.assign(Reputation=reputation_counts)

  #print(df)
  #display(df)

  path = get_common_root() + "Master Guide Word Occurrances.csv"

  df.to_csv(path, index=False)

#print(create_master_guide_word_dictionary())

# create_styled_html_pages

In [345]:
#For each Work, color code the Guide Words and save as a html file:

def create_styled_html_pages():

  source_file_metadata = get_works_metatdata()
  work_name_list = get_work_names()
  guide_word_list = get_guide_word_list()

  for work_name in work_name_list:

    source_file_path = get_work_source_text(work_name)

    # Open the source text file of the Work:
    with open(source_file_path, "r") as f:
      file_contents = f.read()

    file_contents = remove_uppercase(file_contents)

    for guide_word in guide_word_list:

      # Set the color coding for the full source text page:
      color = get_guide_word_color(guide_word)
      guide_word_html = get_guide_word_html(guide_word, color)

      # Splice in HTML for a colored Span tag:
      file_contents = file_contents.replace(guide_word, guide_word_html)

    # end for guide_word in guide_word_list

    words_output_file_path = get_today_path(work_name) + 'Guide Words Page Text' + '.html'

    message = "<b><br><br>To Do:</b> <br><br> save pages as PDF, then subsample down to a PNG with size suitable for Machine Vision, then use existing Clustering 'maths' to determing maximum guideword clusteirng<br><br>"
    # Open new output file in write mode, and write the final page HTML:
    with open(words_output_file_path, "w") as output_html_file:
      file_contents = message +  file_contents
      output_html_file.write(file_contents)

    # end 'for each guide word'
  # end for Words, each Work

In [346]:
def appy_styling_to_html(totals_html_table):

  guide_word_list = get_guide_word_list()

  for guide_word in guide_word_list:
    color = get_guide_word_color(guide_word)
    guide_word_html = get_guide_word_html(guide_word, color)

    totals_html_table = re.sub(guide_word, guide_word_html, totals_html_table)

  return totals_html_table

# create_styled_html_sentence_pages

In [347]:
# For each Work, split text into sentences and
# color code the Guide Words, and save as a html file:

def create_styled_html_sentence_pages():

  guide_sentence_count_dict = {}
  styled_guide_sentences = []

  work_name_list = get_work_names()

  for work_name in work_name_list:

    source_file_path = get_work_source_text(work_name)

    # Open the source text file of the Work:
    with open(source_file_path, "r") as f:
      file_contents = f.read()

    file_contents = remove_uppercase(file_contents)

    # Split file text into sentences:
    sentence_list = file_contents.split(".")

    # add color coding to all sentences with guide words in them:
    for sentence in sentence_list:
      word_list = sentence.split()

      for i in range(len(word_list)):
        if word_list[i] in guide_word_list:

          color = get_guide_word_color(word_list[i])
          guide_word_html = get_guide_word_html(word_list[i], color)
          word_list[i] = word_list[i].replace(word_list[i], guide_word_html)

          sentence_string = " ".join(word_list)
          styled_guide_sentences.append(sentence_string)

    # Create a HTML table of color coded sentences:
    page_html_guide_sentences = list_to_html_table(styled_guide_sentences)

    # For Header, open template file in Common dir for substitutions:
    page_html_path = get_common_root() + 'guide_sentences_template.html'

    with open(page_html_path, "r") as f:
          html_template_file_contents = f.read()

    # Make the substitutions into a copy of the Header template:
    html_template_file_contents = html_template_file_contents.replace("Page_Title", work_name)
    html_template_file_contents = html_template_file_contents.replace("Page_Heading", work_name)

    # Get occurrences of guide words:
    fortune_count = get_guide_word_count_for_work(work_name, 'fortune')
    nature_count = get_guide_word_count_for_work(work_name, 'nature')
    honour_count = get_guide_word_count_for_work(work_name, 'honour')
    reputation_count = get_guide_word_count_for_work(work_name, 'reputation')

    # Get overall total of guide words for the Work:
    work_total_guide_words = int(fortune_count) + int(nature_count) + int(honour_count) + int(reputation_count)

    word_occurrance_dict = {"fortune": fortune_count, "nature": nature_count, "honour": honour_count, "reputation": reputation_count }

    # Create color-coded HTML table of guide word occurrences for Header:
    totals_html_table = dict_to_html_table(word_occurrance_dict)
    styled_totals_html_table = appy_styling_to_html(totals_html_table)
    html_template_file_contents += styled_totals_html_table + '<br/>'

    # Finish making file header:
    html_template_file_contents = html_template_file_contents.replace("Guide_Sentences",
          '<b>' + str(work_total_guide_words) + '</b>' + ' Sentences with Guide Words' + '</b>' + '<br/>')

    # Add HTML header to HTML sentences body:
    page_html = html_template_file_contents + page_html_guide_sentences

    # Open new output file in write mode, and write the final HTML:
    html_output_file_path = get_work_dir(work_name) + work_name + ' Guide Words Sentences' + '.html'

    with open(html_output_file_path, "w") as output_html_file:
      output_html_file.write(page_html)



In [348]:
def read_text_file(file_path):
  """Reads a text file and returns a list of words."""
  with open(file_path, "r") as f:
    text = f.read()
    text = text.lower()
  words = text.split()
  return words


In [349]:
def filter_words(words, substring):
  """Filters a list of words to only include words that contain a substring."""
  filtered_words = []
  for word in words:
    if substring in word:
      filtered_words.append(word)
  return filtered_words


In [350]:
def read_text_file(file_path):
  """Reads a text file and returns a list of words."""
  with open(file_path, "r") as f:
    text = f.read()
    text = text.lower()
  words = text.split()
  return words

In [351]:
def get_guide_word_dict(work_name):

  dict_file_path = get_work_dir(work_name) + work_name + ' guide words' + '.csv'

  with open(dict_file_path, 'r') as f:
    reader = csv.DictReader(f)

    data_dict = {}
    for row in reader:
        data_dict[row['Count']] = row

  return data_dict

In [352]:
def display_dataframe(guide_word, guide_word_counts, display_row_count):

  df = pd.DataFrame.from_dict(guide_word_counts, orient="index")

  column_name = "Occurrances of '" + guide_word + "'"
  df.set_axis([column_name], axis=1, inplace=True)

  #df = df.append({column_name: df[column_namdf1 = df.set_index('project')
  #df1 = df.set_index('project')
  df.loc['Total'] = df.sum(numeric_only=True)

  display(df.head(display_row_count))

  print("\n")

  return


In [353]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import csv

def create_word_cloud(work_name):

  word_occurrences = {}

  word_count_file_path =  get_work_dir(work_name) + work_name + '.csv'
  with open(word_count_file_path, 'r') as f:
    reader = csv.DictReader(f)

    for row in reader:
        word_occurrences[row['Word']] = int(row['occurrence'])

    # Create a WordCloud object
    wordcloud = WordCloud()

    # Generate the word cloud
    wordcloud.generate_from_frequencies(word_occurrences)

    # Display the word cloud
    #plt.imshow(wordcloud, interpolation="bilinear")
    #plt.axis("off")
    #plt.show()

  word_count_save_path =  get_work_dir(work_name) + work_name + '.png'
  wordcloud.to_file(word_count_save_path)

#print(create_word_cloud('Othello'))

# create_word_clouds

In [354]:
def create_word_clouds():

  work_name_list = get_work_names()

  for work_name in work_name_list:
    create_word_cloud(work_name)


In [372]:
def create_styled_pdf_pages():

  work_name_list = get_work_names()

  for work_name in work_name_list:

    html_path = open(get_work_dir(work_name) + "Guide Words Page Text" + '.html', 'r', encoding='utf-8')
    #print(html_path)
    source_code = html_path.read()

    html = HTML(string=source_code)
    pdf = html.write_pdf()

    with open(get_work_dir(work_name) + "Guide Words Page Text" + '.pdf', 'wb') as f:
        f.write(pdf)

#print(create_styled_pdf_pages())

# create_styled_pdf_pages

In [373]:
def word_clusters():

  create_word_dictionaries()

  create_guide_word_dictionaries()

  create_master_guide_word_dictionary()

  #create_word_clouds()

  create_styled_html_pages()

  create_styled_html_sentence_pages()

  #create_styled_pdf_pages()


>[create_word_dictionaries](#scrollTo=aZnkrBe57pVi)

>[create_guide_word_dictionaries](#scrollTo=S0OqJetx8cyw)

>[create_master_guide_word_dictionary](#scrollTo=VrdK8wJH9Guz)

>[create_styled_html_pages](#scrollTo=hyCS89xC7PQk)

>[create_styled_html_sentence_pages](#scrollTo=3tLxy0cK5OkR)

>[create_word_clouds](#scrollTo=nY-bkOQX8tCr)

>[create_styled_pdf_pages](#scrollTo=I_Yq2uEPAXHN)



In [357]:
word_clusters()