In [1]:
def load_words(filename):
    with open(filename, 'r') as file:
        words = file.read().splitlines()
    return words

words = load_words('../words_alpha.txt')

In [5]:
import nltk
import re
from nltk.corpus import cmudict

# Download CMU dictionary
nltk.download('cmudict')

# Load the CMU Pronouncing Dictionary
cmu_dict = cmudict.dict()

def count_syllables_dict(word):
    """Count syllables using the CMU Pronouncing Dictionary."""
    word = word.lower()
    if word in cmu_dict:
        # Count the number of vowel sounds in the phonetic transcription
        return [len([phoneme for phoneme in pron if phoneme[-1].isdigit()]) for pron in cmu_dict[word]][0]
    else:
        # Fallback to the heuristic method if the word is not in the dictionary
        return count_syllables_heuristic(word)

def count_syllables_heuristic(word):
    """Fallback heuristic for counting syllables when the word is not in the dictionary."""
    word = word.lower()
    syllable_count = len(re.findall(r'[aeiouy]+', word))
    if word.endswith('e') and not re.search(r'[aeiouy]{2}', word[-3:]) and len(word) > 2 and word[-2:] != 'le':
        syllable_count -= 1
    diphthong_patterns = ['ai', 'au', 'ea', 'ei', 'ou', 'ie']
    for pattern in diphthong_patterns:
        syllable_count -= len(re.findall(pattern, word))
    if word.endswith("le") and len(word) > 2 and word[-3] not in 'aeiouy':
        syllable_count += 1
    return max(1, syllable_count)

words_to_test = [
    # Silent "e" and Irregular Endings
    "fire", "hire", "acre", "table",
    
    # Diphthongs and Triphthongs
    "audio", "queue", "chaos", "poetry",
    
    # Consonant Clusters
    "rural", "squirrel",
    
    # Uncommon Vowel Combinations
    "colonel", "choir", "hour",
    
    # Words with Uncommon Endings
    "people", "apple", "ballet",
    
    # Prefix and Suffix Issues
    "reenter", "preexisting", "misunderstood",
    
    # Short Irregular Words
    "eye", "i", "you",
    
    # Hyphenated and Compound Words
    "mother-in-law", "check-in", "high-school",
    
    # Foreign Borrowings
    "genre", "debris", "faux", "bureau",
    
    # Special Cases and Exceptions
    "one", "wednesday", "clothes", "every"
]

for word in words_to_test:
    print(f"Word: {word}, Syllables: {count_syllables_dict(word)}")

[nltk_data] Downloading package cmudict to
[nltk_data]     C:\Users\Lardex\AppData\Roaming\nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


Word: fire, Syllables: 2
Word: hire, Syllables: 2
Word: acre, Syllables: 2
Word: table, Syllables: 2
Word: audio, Syllables: 3
Word: queue, Syllables: 1
Word: chaos, Syllables: 2
Word: poetry, Syllables: 3
Word: rural, Syllables: 2
Word: squirrel, Syllables: 2
Word: colonel, Syllables: 2
Word: choir, Syllables: 2
Word: hour, Syllables: 2
Word: people, Syllables: 2
Word: apple, Syllables: 2
Word: ballet, Syllables: 2
Word: reenter, Syllables: 3
Word: preexisting, Syllables: 4
Word: misunderstood, Syllables: 4
Word: eye, Syllables: 1
Word: i, Syllables: 1
Word: you, Syllables: 1
Word: mother-in-law, Syllables: 4
Word: check-in, Syllables: 2
Word: high-school, Syllables: 2
Word: genre, Syllables: 2
Word: debris, Syllables: 2
Word: faux, Syllables: 1
Word: bureau, Syllables: 2
Word: one, Syllables: 1
Word: wednesday, Syllables: 2
Word: clothes, Syllables: 1
Word: every, Syllables: 3


In [None]:
import pyodbc as odbc
import os
from dotenv import load_dotenv

load_dotenv()
SERVER = os.getenv('SERVER')
DATABASE = os.getenv('DATABASE')
# PASS = os.getenv('PASSWORD')
# USER = os.getenv('USER')

connectionString = f"""
DRIVER={{SQL Server}};
SERVER={SERVER};
DATABASE={DATABASE};
Trusted_Connection=yes;
"""
# pwd={PASS};
# uid={USER};
# Connect to SQL Server database
conn = odbc.connect(connectionString)
print(conn)