<a href="https://colab.research.google.com/github/Scodingcurriculum/G78-AI-2025/blob/main/98P_AI_C78_L07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =====================================
# Lesson 7: AI Text Analyzer
# =====================================

# 🧠 AI Concept:
# Simulates how AI systems perform basic NLP by tokenizing text
# and counting word frequencies to reveal patterns in language.

import string  # For punctuation removal

# =========================
# Section 1: Introduction
# =========================
print("📊 Welcome to the AI Text Analyzer!")
print("Enter a paragraph, and I'll show you how often each word appears.\n")

# =========================
# Section 2: Input & Preprocessing
# =========================
paragraph = input("👉 Please enter your paragraph:\n> ").strip()
paragraph_lower = paragraph.lower()  # Uniform lowercase

# Remove punctuation
translator = str.maketrans('', '', string.punctuation)
clean_paragraph = paragraph_lower.translate(translator)

# =========================
# Section 3: Tokenization
# =========================
words = clean_paragraph.split()  # Split into word tokens

# =========================
# Section 4: Additional Activity
#   Minimum Word Length Filter
# =========================

# Ask user for the minimum word length (Recap: using variables & input)
min_length = int(input("\n🔎 Enter minimum word length to include (e.g., 3): ").strip())

# Prepare to track ignored short words
short_words = []

# =========================
# Section 5: Count with Filter
# =========================
word_freq = {}  # Dictionary to hold filtered word counts

for word in words:
    # (Additional Activity) Ignore words shorter than min_length
    if len(word) < min_length:
        short_words.append(word)  # Track skipped words for reporting
        continue                 # Skip this word in frequency counting

    # Core logic: update dictionary counts
    if word in word_freq:
        word_freq[word] += 1
    else:
        word_freq[word] = 1

# =========================
# Section 6: Display Results
# =========================

# 6a) Report on ignored short words
print(f"\n🚫 Ignored words shorter than {min_length} characters:")
if short_words:
    skip_freq = {}
    for w in short_words:
        # (Additional Activity) Build skip-frequency for reporting
        skip_freq[w] = skip_freq.get(w, 0) + 1
    for w, count in skip_freq.items():
        print(f" • '{w}': skipped {count} time(s)")
else:
    print(" • None — all words met the length requirement.")

# 6b) Display filtered word frequencies
print(f"\n📈 Word Frequency (words ≥ {min_length} letters):")
print("-" * 40)
for word, freq in sorted(word_freq.items(), key=lambda x: x[1], reverse=True):
    print(f"{word:15} → {freq} time(s)")

# =========================
# Section 7: Recap of Concepts Used
# =========================
print("\n✅ You practiced:")
print("- String methods: .lower(), .translate(), .split()")
print("- Loops and conditional checks inside dictionary logic")
print("- Dictionary updates for counting frequencies")
print("- Building and reporting a skip-frequency dictionary")
print("- Sorting dictionary items before display")
print("=" * 50)


📊 Welcome to the AI Text Analyzer!
Enter a paragraph, and I'll show you how often each word appears.

👉 Please enter your paragraph:
> this is india and we are loving it

🔎 Enter minimum word length to include (e.g., 3): 3

🚫 Ignored words shorter than 3 characters:
 • 'is': skipped 1 time(s)
 • 'we': skipped 1 time(s)
 • 'it': skipped 1 time(s)

📈 Word Frequency (words ≥ 3 letters):
----------------------------------------
this            → 1 time(s)
india           → 1 time(s)
and             → 1 time(s)
are             → 1 time(s)
loving          → 1 time(s)

✅ You practiced:
- String methods: .lower(), .translate(), .split()
- Loops and conditional checks inside dictionary logic
- Dictionary updates for counting frequencies
- Building and reporting a skip-frequency dictionary
- Sorting dictionary items before display
