<a href="https://colab.research.google.com/github/TCU-DCDA/WRIT20833-2025/blob/main/notebooks/exercises/Review_04_Functions_Advanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# WRIT 20833 Review 04: Functions & Advanced Processing

Practice writing functions and processing complex cultural datasets.

**Make a copy:** File > Save a copy in Drive

## Exercise 1: Basic Functions
Write simple functions for text analysis.

In [None]:
# Function to analyze a quote
def analyze_quote(text, author="Unknown"):
    word_count = len(text.split())
    char_count = len(text)
    
    print("Quote by " + author + ":")
    print("Text: \"" + text + "\"")
    print("Words: " + str(word_count) + ", Characters: " + str(char_count))
    
    return word_count  # Return for further processing

# Test the function
analyze_quote("To be or not to be, that is the question", "Shakespeare")
print()
analyze_quote("I have a dream", "Martin Luther King Jr.")
print()
analyze_quote("Hello world")  # No author provided

## Exercise 2: Functions with Return Values
Create functions that calculate and return results.

In [None]:
# Function to calculate reading time
def calculate_reading_time(word_count, words_per_minute=200):
    """Calculate reading time in minutes""" 
    minutes = word_count / words_per_minute
    return round(minutes, 1)

# Function to categorize text length
def categorize_length(word_count):
    if word_count < 10:
        return "Very Short"
    elif word_count < 100:
        return "Short"
    elif word_count < 1000:
        return "Medium"
    else:
        return "Long"

# Function to analyze difficulty (simplified)
def analyze_difficulty(text):
    words = text.split()
    long_words = 0
    for word in words:
        if len(word) > 6:
            long_words = long_words + 1
    
    if len(words) > 0:
        percentage = (long_words / len(words)) * 100
    else:
        percentage = 0
    
    if percentage > 30:
        return "Difficult"
    elif percentage > 15:
        return "Moderate"
    else:
        return "Easy"

# Test with different texts
texts = [
    "The cat sat on the mat",
    "Understanding requires examination of phenomena",
    "We hold these truths to be self-evident"
]

for text in texts:
    word_count = len(text.split())
    reading_time = calculate_reading_time(word_count)
    category = categorize_length(word_count)
    difficulty = analyze_difficulty(text)
    
    print("Text: \"" + text + "\"")
    print("Length: " + category + " (" + str(word_count) + " words)")
    print("Reading time: " + str(reading_time) + " minutes")
    print("Difficulty: " + difficulty)
    print()

## Exercise 3: Processing Lists of Data
Use functions to process cultural datasets.

In [None]:
# Function to process a book collection (simplified)
def process_book_collection(books):
    """Analyze a collection of book dictionaries""" 
    total_books = len(books)
    
    # Calculate total pages
    total_pages = 0
    for book in books:
        total_pages = total_pages + book["pages"]
    
    if total_books > 0:
        avg_pages = total_pages / total_books
    else:
        avg_pages = 0
    
    # Find oldest and newest books
    oldest_year = books[0]["year"]
    newest_year = books[0]["year"]
    oldest_book = books[0]
    newest_book = books[0]
    
    for book in books:
        if book["year"] < oldest_year:
            oldest_year = book["year"]
            oldest_book = book
        if book["year"] > newest_year:
            newest_year = book["year"]
            newest_book = book
    
    print("Collection Analysis:")
    print("Total books: " + str(total_books))
    print("Total pages: " + str(total_pages))
    print("Average pages: " + str(int(avg_pages)))
    print("Oldest: " + oldest_book["title"] + " (" + str(oldest_book["year"]) + ")")
    print("Newest: " + newest_book["title"] + " (" + str(newest_book["year"]) + ")")
    
    return {
        "total_books": total_books,
        "avg_pages": avg_pages,
        "year_range": newest_book["year"] - oldest_book["year"]
    }

# Sample book collection
books = [
    {"title": "1984", "author": "George Orwell", "year": 1949, "pages": 328},
    {"title": "Pride and Prejudice", "author": "Jane Austen", "year": 1813, "pages": 432},
    {"title": "The Handmaid's Tale", "author": "Margaret Atwood", "year": 1985, "pages": 311}
]

stats = process_book_collection(books)
print("\nYear span: " + str(stats["year_range"]) + " years")

## Exercise 4: Text Comparison Functions
Compare and analyze multiple texts.

In [None]:
# Function to find common words (simplified)
def find_common_words(text1, text2):
    """Find words that appear in both texts""" 
    words1 = text1.lower().split()
    words2 = text2.lower().split()
    
    common = []
    for word in words1:
        if word in words2 and word not in common and len(word) >= 4:
            common.append(word)
    
    return common

# Function to calculate basic similarity
def calculate_similarity(text1, text2):
    """Calculate basic similarity between texts""" 
    words1 = text1.lower().split()
    words2 = text2.lower().split()
    
    # Count common words
    common_count = 0
    for word in words1:
        if word in words2:
            common_count = common_count + 1
    
    # Basic similarity calculation
    total_words = len(words1) + len(words2)
    if total_words > 0:
        similarity = (common_count * 2) / total_words * 100
    else:
        similarity = 0
    
    return round(similarity, 1)

# Function to compare writing styles (simplified)
def compare_styles(text1, text2, title1="Text 1", title2="Text 2"):
    """Compare basic style metrics""" 
    
    # Get stats for text 1
    words1 = text1.split()
    sentences1 = text1.count('.') + text1.count('!') + text1.count('?')
    if sentences1 == 0:
        sentences1 = 1
    
    # Get stats for text 2
    words2 = text2.split()
    sentences2 = text2.count('.') + text2.count('!') + text2.count('?')
    if sentences2 == 0:
        sentences2 = 1
    
    print("Style Comparison:")
    print(title1 + ": " + str(len(words1)) + " words, " + str(sentences1) + " sentences")
    print(title2 + ": " + str(len(words2)) + " words, " + str(sentences2) + " sentences")

# Test with historical texts
declaration = "We hold these truths to be self-evident, that all men are created equal."
gettysburg = "Four score and seven years ago our fathers brought forth a new nation."

common_words = find_common_words(declaration, gettysburg)
similarity = calculate_similarity(declaration, gettysburg)

print("Common words: " + str(common_words))
print("Similarity: " + str(similarity) + "%")
print()
compare_styles(declaration, gettysburg, "Declaration", "Gettysburg Address")

## Exercise 5: Your Turn - Custom Functions
Write functions for your research interests.

In [None]:
# TODO: Write a function relevant to your field of study
# Examples: analyze_poem(), categorize_artwork(), process_historical_event()

def analyze_my_data(item, category="general"):
    """Customize this function for your research needs""" 
    
    # TODO: Add your analysis logic here
    # Consider: What would you want to measure or categorize?
    
    result = {
        "item": item,
        "category": category,
        "analysis": "Complete your analysis here"
    }
    
    return result

# TODO: Write a function that processes a list of your items
def process_my_collection(items):
    """Process a collection of items from your field""" 
    
    # TODO: Implement collection-level analysis
    total_items = len(items)
    
    print("Processing " + str(total_items) + " items...")
    
    for item in items:
        # Process each item
        result = analyze_my_data(item)
        print("- " + result["item"] + ": " + result["analysis"])
    
    return "Processed " + str(total_items) + " items"

# TODO: Test your functions
sample_items = ["Item 1", "Item 2", "Item 3"]  # Replace with your data

# Test individual function
result = analyze_my_data("Sample Item", "test category")
print("Individual analysis:", result)
print()

# Test collection function
summary = process_my_collection(sample_items)
print("\nSummary:", summary)

## Exercise 6: Advanced Data Processing
Combine multiple functions for complex analysis.

In [None]:
# Complete analysis pipeline for cultural texts (simplified)
def full_text_analysis(texts_with_info):
    """Complete analysis of multiple texts with metadata""" 
    
    def analyze_single_text(text_info):
        text = text_info["text"]
        title = text_info["title"]
        author = text_info["author"]
        
        # Basic metrics
        words = text.split()
        word_count = len(words)
        char_count = len(text)
        sentence_count = text.count('.') + text.count('!') + text.count('?')
        
        # Calculate average word length
        total_chars = 0
        for word in words:
            clean_word = word.strip(".,!?;:")
            total_chars = total_chars + len(clean_word)
        
        if len(words) > 0:
            avg_word_length = total_chars / len(words)
        else:
            avg_word_length = 0
        
        return {
            "title": title,
            "author": author,
            "word_count": word_count,
            "sentence_count": sentence_count,
            "avg_word_length": round(avg_word_length, 2),
            "reading_time": calculate_reading_time(word_count)
        }
    
    # Process all texts
    results = []
    for text_info in texts_with_info:
        analysis = analyze_single_text(text_info)
        results.append(analysis)
    
    # Collection statistics
    total_words = 0
    for r in results:
        total_words = total_words + r["word_count"]
    
    if len(results) > 0:
        avg_words = total_words / len(results)
    else:
        avg_words = 0
    
    # Display results
    print("COMPLETE TEXT ANALYSIS")
    print("=" * 50)
    
    for result in results:
        print(result["title"] + " by " + result["author"])
        print("  Words: " + str(result["word_count"]) + ", Sentences: " + str(result["sentence_count"]))
        print("  Avg word length: " + str(result["avg_word_length"]))
        print("  Reading time: " + str(result["reading_time"]) + " minutes")
        print()
    
    print("Collection Summary:")
    print("Total texts: " + str(len(results)))
    print("Average words per text: " + str(round(avg_words, 0)))
    
    return results

# Sample data for testing
sample_texts = [
    {
        "title": "Sample Poem",
        "author": "Test Author",
        "text": "This is a sample text for analysis. It has multiple sentences."
    },
    {
        "title": "Another Text",
        "author": "Another Author", 
        "text": "Here is another sample. We can compare them."
    }
]

# Run the complete analysis
analysis_results = full_text_analysis(sample_texts)

## Summary

You practiced:
- Writing functions with parameters and return values
- Processing collections of cultural data
- Comparing and analyzing multiple texts
- Building complex analysis pipelines
- Creating reusable code for research tasks

**Next:** Review 05 will cover data ethics and collection methods.

