In [2]:
def load_words(filename):
    with open(filename, 'r') as file:
        words = file.read().splitlines()
    return words

words = load_words('../words_alpha.txt')

In [12]:
import re
import nltk
from nltk.corpus import cmudict

nltk.download('cmudict')
d = cmudict.dict()

def count_syllables(word):
    word = word.lower()
    
    # Handling some common exceptions
    exception_add = ['serious', 'crucial', 'table']  # Add 1 syllable
    exception_del = ['fortunately', 'unfortunately']  # Remove 1 syllable
    
    # If word is in exception list, handle accordingly
    if word in exception_add:
        return 3  # Known exception
    if word in exception_del:
        return 4  # Known exception

    # Find vowel groups
    syllable_count = len(re.findall(r'[aeiouy]+', word))

    # Subtract for silent 'e' cases (but handle exceptions)
    if word.endswith('e') and not re.search(r'[aeiouy]{2}', word[-3:]) and len(word) > 2 and word[-2:] != 'le':
        syllable_count -= 1

    # Handle diphthongs (e.g., "ai", "au", "ei")
    diphthong_patterns = ['ai', 'au', 'ea', 'ei', 'ou', 'ie']
    for pattern in diphthong_patterns:
        syllable_count -= len(re.findall(pattern, word))

    # Add for "le" endings if preceded by a consonant (e.g., "table", "bottle")
    if word.endswith("le") and len(word) > 2 and word[-3] not in 'aeiouy':
        syllable_count += 1
    
    # Ensure at least 1 syllable (minimum constraint)
    return max(1, syllable_count)


def count_syllables_dict(word):
    word = word.lower()
    if word in d:
        # Count the syllables in the phonetic transcription
        return [len([y for y in x if y[-1].isdigit()]) for x in d[word]][0]
    else:
        # Fall back to your heuristic for unknown words
        return count_syllables(word)

words_to_test = [
    # Silent "e" and Irregular Endings
    "fire", "hire", "acre", "table",
    
    # Diphthongs and Triphthongs
    "audio", "queue", "chaos", "poetry",
    
    # Consonant Clusters
    "rural", "squirrel",
    
    # Uncommon Vowel Combinations
    "colonel", "choir", "hour",
    
    # Words with Uncommon Endings
    "people", "apple", "ballet",
    
    # Prefix and Suffix Issues
    "reenter", "preexisting", "misunderstood",
    
    # Short Irregular Words
    "eye", "i", "you",
    
    # Hyphenated and Compound Words
    "mother-in-law", "check-in", "high-school",
    
    # Foreign Borrowings
    "genre", "debris", "faux", "bureau",
    
    # Special Cases and Exceptions
    "one", "wednesday", "clothes", "every"
]

for word in words_to_test:
    print(f"Word: {word}, Syllables: {count_syllables(word)}")

[nltk_data] Downloading package cmudict to
[nltk_data]     C:\Users\Lardex\AppData\Roaming\nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


Word: fire, Syllables: 1
Word: hire, Syllables: 1
Word: acre, Syllables: 1
Word: table, Syllables: 3
Word: audio, Syllables: 1
Word: queue, Syllables: 1
Word: chaos, Syllables: 1
Word: poetry, Syllables: 2
Word: rural, Syllables: 2
Word: squirrel, Syllables: 2
Word: colonel, Syllables: 3
Word: choir, Syllables: 1
Word: hour, Syllables: 1
Word: people, Syllables: 3
Word: apple, Syllables: 3
Word: ballet, Syllables: 2
Word: reenter, Syllables: 2
Word: preexisting, Syllables: 3
Word: misunderstood, Syllables: 4
Word: eye, Syllables: 1
Word: i, Syllables: 1
Word: you, Syllables: 1
Word: mother-in-law, Syllables: 4
Word: check-in, Syllables: 2
Word: high-school, Syllables: 2
Word: genre, Syllables: 1
Word: debris, Syllables: 2
Word: faux, Syllables: 1
Word: bureau, Syllables: 1
Word: one, Syllables: 1
Word: wednesday, Syllables: 3
Word: clothes, Syllables: 2
Word: every, Syllables: 3


In [None]:
import pyodbc as odbc
import os
from dotenv import load_dotenv

load_dotenv()
SERVER = os.getenv('SERVER')
DATABASE = os.getenv('DATABASE')
# PASS = os.getenv('PASSWORD')
# USER = os.getenv('USER')

connectionString = f"""
DRIVER={{SQL Server}};
SERVER={SERVER};
DATABASE={DATABASE};
Trusted_Connection=yes;
"""
# pwd={PASS};
# uid={USER};
# Connect to SQL Server database
conn = odbc.connect(connectionString)
print(conn)