# Counting the number of unique words in Shakespeare's Venus and Adonis

In [30]:
from collections import Counter
import re
import csv

## Count Unique Words

In [3]:
# Function to read text from a file
def read_text_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

# Path to the text file
file_path = 'text.txt'

# Read the text from the file
text = read_text_from_file(file_path)

In [9]:

# Remove punctuation and convert to lower case
cleaned_text = re.sub(r'[^\w\s]', '', text).lower()

# Split the text into words
words = cleaned_text.split()

# Count the occurrences of each word
word_counts = Counter(words)

# Number of unique words
num_unique_words = len(word_counts)

# Sort the word counts by highest count
sorted_word_counts = sorted(word_counts.items(), key=lambda item: item[1], reverse=True)

csv_file_path = 'outputfile.csv'

# Write the results to a CSV file
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    csvwriter = csv.writer(csvfile)
    # Write the header
    csvwriter.writerow(['Word', 'Count'])
    # Write the word counts
    for word, count in sorted_word_counts:
        csvwriter.writerow([word, count])

# Display a message indicating that the results have been saved
print(f"Results have been saved to {csv_file_path}")

Results have been saved to outputfile.csv


## Convert CSV to HTML

In [1]:
import pandas as pd

In [3]:
# Read the CSV file
df = pd.read_csv('colors.csv')

# Convert the DataFrame to an HTML table
html_table = df.to_html()

# Save the HTML table to a file
with open('output.html', 'w') as f:
    f.write(html_table)

NameError: name 'pd' is not defined

## Group Lines/Stanzas

In [32]:
# Sample stanza as input
poem = read_text_from_file(file_path)



print("Raw poem text:")

def remove_punctuation(text):
    return re.sub(r'[^\w\s]', '', text)

# Sample stanza as input
poem = read_text_from_file(file_path)

# Split the poem into stanzas
stanzas = poem.strip().split("\n \n")

# Create a dictionary with stanza numbers as keys
stanza_dict = {}

# Populate the dictionary
for i, stanza in enumerate(stanzas, 1):
    # Split the stanza into lines and remove punctuation from each line
    lines = [remove_punctuation(line.lower()) for line in stanza.split("\n")]
    stanza_dict[f'Stanza {i}'] = lines

print(stanza_dict)





Raw poem text:
{'Stanza 1': ['even as the sun with purplecolourd face', 'had taen his last leave of the weeping morn', 'rosecheekd adonis tried him to the chase', 'hunting he lovd but love he laughd to scorn ', '   sickthoughted venus makes amain unto him', '   and like a boldfacd suitor gins to woo him'], 'Stanza 2': ['thrice fairer than myself thus she began', 'the fields chief flower sweet above compare   ', 'stain to all nymphs more lovely than a man', 'more white and red than doves or roses are', '   nature that made thee with herself at strife', '   saith that the world hath ending with thy life'], 'Stanza 3': ['vouchsafe thou wonder to alight thy steed', 'and rein his proud head to the saddlebow', 'if thou wilt deign this favour for thy meed', 'a thousand honey secrets shalt thou know    ', 'here come and sit where never serpent hisses', 'and being set ill smother thee with kisses'], 'Stanza 4': ['and yet not cloy thy lips with loathd satiety', 'but rather famish them amid their

## Count Red and White words

In [33]:
# Lists of words associated with red and white
red_words = ['red', 'rose', 'roses', 'fire', 'fiery', 'crimson', 'purple', 'purplecolourd', 
             'blood', 'bloody', 'rose-cheekd', 'blushd', 'redrose', 'ruby-colourd', 'waxred',
             'plum', 'coral', 'blush', 'blushing', 'ripered']
white_words = ['white', 'moonlight', 'doves', 'frost' , 'pale', 'silver', 
               'lily', 'lawn', 'ivory', 'ashypale', 'alabaster', 'whiter', 'frozen', 
               'milk', 'palefacd', 'coldpale', 'frothy', 'frosty', 'whiteness']
# red_words = ['fire', 'coal', 'dry', 'hot', 'warm', 'coals', 'melt', 'burn']
# white_words = ['water', 'moisture', 'rain', 'tears', 'dian', 'spring', 'wet', 'cold', 'cool', 'dark' 'moist', 'earth']
# Initialize counters
stanza_count = 0
verse_count = 0

# Dictionaries to store indices
stanza_indices = {}
verse_indices = {}

In [34]:
# Iterate over the stanzas
for stanza_index, verses in stanza_dict.items():
    stanza_contains_red = any(word in red_words for verse in verses for word in verse.split())
    stanza_contains_white = any(word in white_words for verse in verses for word in verse.split())
    
    if stanza_contains_red and stanza_contains_white:
        stanza_count += 1
        stanza_indices[stanza_index] = []
        
        # Iterate over the verses within the stanza
        for verse_index, verse in enumerate(verses):
            contains_red = any(word in red_words for word in verse.split())
            contains_white = any(word in white_words for word in verse.split())
            
            if contains_red and contains_white:
                verse_count += 1
                if stanza_index not in verse_indices:
                    verse_indices[stanza_index] = []
                verse_indices[stanza_index].append(verse_index)
                print(stanza_dict[stanza_index])

# Output results
print("Stanza indices where both red and white words appear together:")
print(stanza_indices)
print(f"Total number of stanzas with both red and white words: {stanza_count}")

print("Verse indices within stanzas where both red and white words appear together:")
print(verse_indices)
print(f"Total number of verses with both red and white words: {verse_count}")


['thrice fairer than myself thus she began', 'the fields chief flower sweet above compare   ', 'stain to all nymphs more lovely than a man', 'more white and red than doves or roses are', '   nature that made thee with herself at strife', '   saith that the world hath ending with thy life']
['and yet not cloy thy lips with loathd satiety', 'but rather famish them amid their plenty    ', 'making them red and pale with fresh variety', 'ten kisses short as one one long as twenty', '   a summers day will seem an hour but short', '   being wasted in such timebeguiling sport']
['over one arm the lusty coursers rein', 'under her other was the tender boy', 'who blushd and pouted in a dull disdain', 'with leaden appetite unapt to toy', '   she red and hot as coals of glowing fire', '   he red for shame but frosty in desire']
[' ', 'still she entreats and prettily entreats', 'for to a pretty ear she tunes her tale', 'still is he sullen still he lours and frets', 'twixt crimson shame and anger ash

## Counting Red and White Separately

In [37]:
red_count = 0
white_count = 0

red_indices = []
white_indices = []

word_pattern = re.compile(r"\b[\w'-]+\b")

# Iterate over the stanzas
for stanza_index, verses in stanza_dict.items():
    stanza_red = any(
        word in red_words 
        for verse in verses 
        for word in word_pattern.findall(verse.lower())
    )
    stanza_white = any(
        word in white_words 
        for verse in verses 
        for word in word_pattern.findall(verse.lower())
    )


    if stanza_red and not stanza_white:
        red_count += 1
        red_indices.append(stanza_index)
    if stanza_white and not stanza_red:
        white_count += 1
        white_indices.append(stanza_index)

print(stanza_dict['Stanza 1'])
print(f'Red count {red_count}')
print('Red Indices')
print(red_indices)
print(f'White count: {white_count}')
print('White Indices')
print(white_indices)

['even as the sun with purplecolourd face', 'had taen his last leave of the weeping morn', 'rosecheekd adonis tried him to the chase', 'hunting he lovd but love he laughd to scorn ', '   sickthoughted venus makes amain unto him', '   and like a boldfacd suitor gins to woo him']
Red count 30
Red Indices
['Stanza 1', 'Stanza 16', 'Stanza 18', 'Stanza 19', 'Stanza 20', 'Stanza 25', 'Stanza 33', 'Stanza 37', 'Stanza 46', 'Stanza 56', 'Stanza 65', 'Stanza 76', 'Stanza 83', 'Stanza 85', 'Stanza 86', 'Stanza 88', 'Stanza 91', 'Stanza 93', 'Stanza 96', 'Stanza 109', 'Stanza 111', 'Stanza 156', 'Stanza 167', 'Stanza 173', 'Stanza 179', 'Stanza 184', 'Stanza 187', 'Stanza 194', 'Stanza 197', 'Stanza 198']
White count: 12
White Indices
['Stanza 26', 'Stanza 38', 'Stanza 39', 'Stanza 61', 'Stanza 95', 'Stanza 108', 'Stanza 122', 'Stanza 143', 'Stanza 149', 'Stanza 160', 'Stanza 188', 'Stanza 199']
