# Bilingual NLP Toolkit - Complete Tutorial

This notebook demonstrates the capabilities of the **bilingual** package for Bangla and English NLP tasks.

## Table of Contents
1. Installation & Setup
2. Text Normalization
3. Language Detection
4. Readability Analysis
5. Safety Checking
6. Literary Analysis
7. Poetic Meter Detection
8. Style Transfer
9. Dataset Operations
10. Advanced Examples

## 1. Installation & Setup

First, let's install the package and import necessary modules.

In [None]:
# Install the package (uncomment if needed)
# !pip install -e ..

# Import core modules
from bilingual import bilingual_api as bb
from bilingual.normalize import normalize_text, detect_language
from bilingual.data_utils import BilingualDataset
from bilingual.modules.literary_analysis import metaphor_detector, simile_detector, tone_classifier
from bilingual.modules.poetic_meter import detect_meter
from bilingual.modules.style_transfer_gan import StyleTransferModel

import warnings
warnings.filterwarnings('ignore')

print("✓ All modules imported successfully!")

## 2. Text Normalization

Text normalization cleans and standardizes text for processing.

In [None]:
# Bangla text normalization
text_bn = "আমি   স্কুলে যাচ্ছি।  "
normalized_bn = bb.normalize_text(text_bn, lang="bn")

print("Bangla Normalization:")
print(f"  Original:   '{text_bn}'")
print(f"  Normalized: '{normalized_bn}'")
print()

# English text normalization
text_en = "I am   going to school.  "
normalized_en = bb.normalize_text(text_en, lang="en")

print("English Normalization:")
print(f"  Original:   '{text_en}'")
print(f"  Normalized: '{normalized_en}'")

## 3. Language Detection

Automatically detect whether text is Bangla or English.

In [None]:
test_texts = [
    "আমি বাংলায় কথা বলি।",
    "I speak English.",
    "আমি school যাই।"
]

print("Language Detection Results:")
for text in test_texts:
    lang = detect_language(text)
    print(f"Text: {text}")
    print(f"  → Detected: {lang}\n")

## 4. Readability Analysis

Analyze text complexity and determine appropriate reading level.

In [None]:
texts_to_analyze = [
    ("আমি স্কুলে যাই।", "bn"),
    ("I go to school.", "en"),
]

print("Readability Analysis:")
for text, lang in texts_to_analyze:
    result = bb.readability_check(text, lang=lang)
    print(f"Text: {text}")
    print(f"  Level: {result['level']}")
    print(f"  Score: {result['score']:.2f}\n")

## 5. Literary Analysis

Detect literary devices like metaphors and similes, and analyze tone.

In [None]:
# Metaphor Detection
text = "Life is a journey through time."
metaphors = metaphor_detector(text)
print(f"Text: {text}")
for m in metaphors:
    print(f"  → Found: '{m['text']}' (type: {m['type']})")

In [None]:
# Tone Classification
tone_texts = [
    "This is wonderful! I love it!",
    "আমি খুব খুশি এবং আনন্দিত।"
]

for text in tone_texts:
    tone = tone_classifier(text)
    dominant = max(tone.items(), key=lambda x: x[1])[0]
    print(f"Text: {text}")
    print(f"  Tone: {dominant.upper()} (P:{tone['positive']:.2f}, N:{tone['neutral']:.2f}, Neg:{tone['negative']:.2f})\n")

## 6. Poetic Meter Detection

Analyze syllable patterns and detect poetic meter in text.

In [None]:
# English Poetry
english_poem = """Shall I compare thee to a summer's day?
Thou art more lovely and more temperate."""

result = detect_meter(english_poem, language='english')
print(f"Language: {result['language']}")
print(f"Pattern: {result['pattern']}")
print(f"Avg Syllables/Line: {result['summary']['avg_units_per_line']}")
for line in result['lines']:
    print(f"  Line {line['line_number']}: {line['total_syllables']} syllables")

## 7. Style Transfer

Convert text between formal, informal, and poetic styles.

In [None]:
# Initialize style transfer model
style_model = StyleTransferModel(model_type='rule_based')
style_model.load()

text_informal = "I can't do this and I won't try it."
text_formal = style_model.convert(text_informal, target_style='formal')

print(f"Informal: {text_informal}")
print(f"Formal:   {text_formal}")

## 8. Dataset Operations

Work with bilingual datasets efficiently.

In [None]:
# Create a sample dataset
data = [
    {"text": "আমি স্কুলে যাই।", "lang": "bn"},
    {"text": "I go to school.", "lang": "en"},
    {"text": "আমি বই পড়ি।", "lang": "bn"},
    {"text": "I read books.", "lang": "en"},
]

dataset = BilingualDataset(data=data)
print(f"Total samples: {len(dataset)}")

# Filter by language
bn_dataset = dataset.filter(lambda x: x["lang"] == "bn")
print(f"Bangla samples: {len(bn_dataset)}")

## 9. Advanced Example: Complete Text Analysis

Combine multiple features for comprehensive analysis.

In [None]:
def analyze_text_complete(text: str):
    """Perform comprehensive analysis on input text."""
    print(f"Analyzing: '{text}'")
    print("=" * 60)
    
    lang = detect_language(text)
    print(f"1. Language: {lang}")
    
    normalized = normalize_text(text, lang=lang)
    print(f"2. Normalized: '{normalized}'")
    
    readability = bb.readability_check(text, lang=lang)
    print(f"3. Readability: {readability['level']} (Score: {readability['score']:.2f})")
    
    metaphors = metaphor_detector(text)
    similes = simile_detector(text)
    print(f"4. Literary Devices: {len(metaphors)} metaphors, {len(similes)} similes")
    
    tone = tone_classifier(text)
    dominant_tone = max(tone.items(), key=lambda x: x[1])[0]
    print(f"5. Tone: {dominant_tone.upper()}")
    print("=" * 60)

# Test with sample text
analyze_text_complete("Life is a beautiful journey like a river flowing to the sea.")