<a href="https://colab.research.google.com/github/TCU-DCDA/WRIT20833-2025/blob/main/notebooks/exercises/Review_04_Functions_Advanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# WRIT 20833 Review 04: Functions & Advanced Processing

Practice writing functions and processing complex cultural datasets.

**Make a copy:** File > Save a copy in Drive

## Exercise 1: Basic Functions
Write simple functions for text analysis.

In [None]:
# Function to analyze a quote
def analyze_quote(text, author="Unknown"):
    word_count = len(text.split())
    char_count = len(text)
    has_punctuation = any(p in text for p in ".,!?;:")
    
    print(f"Quote by {author}:")
    print(f"Text: \"{text}\"")
    print(f"Words: {word_count}, Characters: {char_count}")
    print(f"Has punctuation: {has_punctuation}")
    
    return word_count  # Return for further processing

# Test the function
analyze_quote("To be or not to be, that is the question", "Shakespeare")
print()
analyze_quote("I have a dream", "Martin Luther King Jr.")
print()
analyze_quote("Hello world")  # No author provided

## Exercise 2: Functions with Return Values
Create functions that calculate and return results.

In [None]:
# Function to calculate reading time
def calculate_reading_time(word_count, words_per_minute=200):
    \"\"\"Calculate reading time in minutes\"\"\" 
    minutes = word_count / words_per_minute
    return round(minutes, 1)

# Function to categorize text length
def categorize_length(word_count):
    if word_count < 10:
        return "Very Short"
    elif word_count < 100:
        return "Short"
    elif word_count < 1000:
        return "Medium"
    else:
        return "Long"

# Function to analyze difficulty
def analyze_difficulty(text):
    words = text.split()
    long_words = sum(1 for word in words if len(word) > 6)
    percentage = (long_words / len(words)) * 100 if words else 0
    
    if percentage > 30:
        return "Difficult"
    elif percentage > 15:
        return "Moderate"
    else:
        return "Easy"

# Test with different texts
texts = [
    "The cat sat on the mat",
    "Comprehensive understanding requires methodical examination of multifaceted phenomena",
    "We hold these truths to be self-evident, that all men are created equal"
]

for text in texts:
    word_count = len(text.split())
    reading_time = calculate_reading_time(word_count)
    category = categorize_length(word_count)
    difficulty = analyze_difficulty(text)
    
    print(f"Text: \"{text}\"")
    print(f"Length: {category} ({word_count} words)")
    print(f"Reading time: {reading_time} minutes")
    print(f"Difficulty: {difficulty}")
    print()

## Exercise 3: Processing Lists of Data
Use functions to process cultural datasets.

In [None]:
# Function to process a book collection
def process_book_collection(books):
    \"\"\"Analyze a collection of book dictionaries\"\"\" 
    total_books = len(books)
    total_pages = sum(book["pages"] for book in books)
    avg_pages = total_pages / total_books if total_books > 0 else 0
    
    # Find oldest and newest books
    oldest = min(books, key=lambda x: x["year"])
    newest = max(books, key=lambda x: x["year"])
    
    print(f"Collection Analysis:")
    print(f"Total books: {total_books}")
    print(f"Total pages: {total_pages:,}")
    print(f"Average pages: {avg_pages:.0f}")
    print(f"Oldest: {oldest['title']} ({oldest['year']})")
    print(f"Newest: {newest['title']} ({newest['year']})")
    
    return {
        "total_books": total_books,
        "avg_pages": avg_pages,
        "year_range": newest["year"] - oldest["year"]
    }

# Sample book collection
books = [
    {"title": "1984", "author": "George Orwell", "year": 1949, "pages": 328},
    {"title": "Pride and Prejudice", "author": "Jane Austen", "year": 1813, "pages": 432},
    {"title": "The Handmaid's Tale", "author": "Margaret Atwood", "year": 1985, "pages": 311},
    {"title": "Beloved", "author": "Toni Morrison", "year": 1987, "pages": 275}
]

stats = process_book_collection(books)
print(f"\nYear span: {stats['year_range']} years")

## Exercise 4: Text Comparison Functions
Compare and analyze multiple texts.

In [None]:
# Function to find common words
def find_common_words(text1, text2, min_length=4):
    \"\"\"Find words that appear in both texts\"\"\" 
    words1 = set(word.lower().strip(".,!?;:") for word in text1.split() if len(word) >= min_length)
    words2 = set(word.lower().strip(".,!?;:") for word in text2.split() if len(word) >= min_length)
    
    common = words1.intersection(words2)
    return sorted(list(common))

# Function to calculate similarity
def calculate_similarity(text1, text2):
    \"\"\"Calculate basic similarity between texts\"\"\" 
    words1 = set(text1.lower().split())
    words2 = set(text2.lower().split())
    
    intersection = words1.intersection(words2)
    union = words1.union(words2)
    
    similarity = len(intersection) / len(union) if union else 0
    return round(similarity * 100, 1)

# Function to compare writing styles
def compare_styles(text1, text2, title1="Text 1", title2="Text 2"):
    \"\"\"Compare basic style metrics\"\"\" 
    def get_stats(text):
        words = text.split()
        sentences = text.count('.') + text.count('!') + text.count('?')
        avg_word_length = sum(len(word.strip(".,!?;:")) for word in words) / len(words) if words else 0
        avg_sentence_length = len(words) / sentences if sentences > 0 else len(words)
        
        return {
            "words": len(words),
            "sentences": sentences,
            "avg_word_length": round(avg_word_length, 1),
            "avg_sentence_length": round(avg_sentence_length, 1)
        }
    
    stats1 = get_stats(text1)
    stats2 = get_stats(text2)
    
    print(f"Style Comparison:")
    print(f"{title1}: {stats1['words']} words, {stats1['sentences']} sentences")
    print(f"  Avg word length: {stats1['avg_word_length']} chars")
    print(f"  Avg sentence length: {stats1['avg_sentence_length']} words")
    print()
    print(f"{title2}: {stats2['words']} words, {stats2['sentences']} sentences")
    print(f"  Avg word length: {stats2['avg_word_length']} chars")
    print(f"  Avg sentence length: {stats2['avg_sentence_length']} words")

# Test with historical texts
declaration = "We hold these truths to be self-evident, that all men are created equal, that they are endowed by their Creator with certain unalienable Rights, that among these are Life, Liberty and the pursuit of Happiness."

gettysburg = "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal."

common_words = find_common_words(declaration, gettysburg)
similarity = calculate_similarity(declaration, gettysburg)

print(f"Common words: {common_words}")
print(f"Similarity: {similarity}%")
print()
compare_styles(declaration, gettysburg, "Declaration", "Gettysburg Address")

## Exercise 5: Your Turn - Custom Functions
Write functions for your research interests.

In [None]:
# TODO: Write a function relevant to your field of study
# Examples: analyze_poem(), categorize_artwork(), process_historical_event()

def analyze_my_data(item, category="general"):
    \"\"\"Customize this function for your research needs\"\"\" 
    
    # TODO: Add your analysis logic here
    # Consider: What would you want to measure or categorize?
    
    result = {
        "item": item,
        "category": category,
        "analysis": "Complete your analysis here"
    }
    
    return result

# TODO: Write a function that processes a list of your items
def process_my_collection(items):
    \"\"\"Process a collection of items from your field\"\"\" 
    
    # TODO: Implement collection-level analysis
    total_items = len(items)
    
    print(f"Processing {total_items} items...")
    
    for item in items:
        # Process each item
        result = analyze_my_data(item)
        print(f"- {result['item']}: {result['analysis']}")
    
    return f"Processed {total_items} items"

# TODO: Test your functions
sample_items = ["Item 1", "Item 2", "Item 3"]  # Replace with your data

# Test individual function
result = analyze_my_data("Sample Item", "test category")
print("Individual analysis:", result)
print()

# Test collection function
summary = process_my_collection(sample_items)
print("\nSummary:", summary)

## Exercise 6: Advanced Data Processing
Combine multiple functions for complex analysis.

In [None]:
# Complete analysis pipeline for cultural texts
def full_text_analysis(texts_with_info):
    \"\"\"Complete analysis of multiple texts with metadata\"\"\" 
    
    def analyze_single_text(text_info):
        text = text_info["text"]
        title = text_info["title"]
        author = text_info["author"]
        
        # Basic metrics
        words = text.split()
        word_count = len(words)
        char_count = len(text)
        sentence_count = text.count('.') + text.count('!') + text.count('?')
        
        # Advanced metrics
        avg_word_length = sum(len(word.strip(".,!?;:")) for word in words) / len(words) if words else 0
        unique_words = len(set(word.lower().strip(".,!?;:") for word in words))
        lexical_diversity = unique_words / word_count if word_count > 0 else 0
        
        return {
            "title": title,
            "author": author,
            "word_count": word_count,
            "sentence_count": sentence_count,
            "avg_word_length": round(avg_word_length, 2),
            "lexical_diversity": round(lexical_diversity, 3),
            "reading_time": calculate_reading_time(word_count)
        }
    
    # Process all texts
    results = []
    for text_info in texts_with_info:
        analysis = analyze_single_text(text_info)
        results.append(analysis)
    
    # Collection statistics
    total_words = sum(r["word_count"] for r in results)
    avg_words = total_words / len(results) if results else 0
    most_complex = max(results, key=lambda x: x["avg_word_length"])
    most_diverse = max(results, key=lambda x: x["lexical_diversity"])
    
    # Display results
    print("COMPLETE TEXT ANALYSIS")
    print("=" * 50)
    
    for result in results:
        print(f"{result['title']} by {result['author']}")
        print(f"  Words: {result['word_count']}, Sentences: {result['sentence_count']}")
        print(f"  Avg word length: {result['avg_word_length']}")
        print(f"  Lexical diversity: {result['lexical_diversity']}")
        print(f"  Reading time: {result['reading_time']} minutes")
        print()
    
    print(f"COLLECTION SUMMARY:")
    print(f"Total texts: {len(results)}")
    print(f"Total words: {total_words:,}")
    print(f"Average words per text: {avg_words:.0f}")
    print(f"Most complex language: {most_complex['title']}")
    print(f"Most diverse vocabulary: {most_diverse['title']}")
    
    return results

# Sample texts for analysis
sample_texts = [
    {
        "title": "Hamlet Soliloquy",
        "author": "Shakespeare",
        "text": "To be, or not to be, that is the question: Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune, or to take arms against a sea of troubles and by opposing end them."
    },
    {
        "title": "I Have a Dream",
        "author": "Martin Luther King Jr.",
        "text": "I have a dream that one day this nation will rise up and live out the true meaning of its creed: We hold these truths to be self-evident, that all men are created equal."
    }
]

analysis_results = full_text_analysis(sample_texts)

## Summary

You practiced:
- Writing functions with parameters and return values
- Processing collections of cultural data
- Comparing and analyzing multiple texts
- Building complex analysis pipelines
- Creating reusable code for research tasks

**Next:** Review 05 will cover data ethics and collection methods.

