# Italian Grammar Analysis with Open Source LLMs

This notebook improves Italian grammar explanations using open-source models that run on Colab Pro.

In [None]:
# Install required packages
!pip install transformers torch accelerate sentencepiece datasets

# Install Flash Attention 2 for better performance (optional, will fallback if fails)
!pip install flash-attn --no-build-isolation

# Mount Google Drive to access your dataset
from google.colab import drive
drive.mount('/content/drive')

Collecting flash-attn
  Downloading flash_attn-2.8.3.tar.gz (8.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: flash-attn
  Building wheel for flash-attn (setup.py) ... [?25l[?25hdone
  Created wheel for flash-attn: filename=flash_attn-2.8.3-cp312-cp312-linux_x86_64.whl size=256040057 sha256=f25da18657a87fc83dc1bfb8b7751b82246e9db355510226b674fd437c34b5fb
  Stored in directory: /root/.cache/pip/wheels/3d/59/46/f282c12c73dd4bb3c2e3fe199f1a0d0f8cec06df0cccfeee27
Successfully built flash-attn
Installing collected packages: flash-attn
Successfully installed flash-attn-2.8.3
Mounted at /content/drive


In [None]:
import json
from pathlib import Path
import torch
from transformers import pipeline
from tqdm import tqdm
import re
import os
import time

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

CUDA available: True
GPU: Tesla T4
Memory: 15.8 GB


In [None]:
# Clear any existing model cache and restart if needed
import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("🧹 Cleared GPU cache")

# Check if we need to restart due to Flash Attention issues
try:
    # This will fail if Flash Attention was loaded incorrectly
    test_tensor = torch.randn(1, 1, device='cuda' if torch.cuda.is_available() else 'cpu')
    print("✅ GPU state clean")
except Exception as e:
    print(f"⚠️ GPU state issue: {e}")
    print("💡 If you see Flash Attention errors, restart runtime: Runtime → Restart Runtime")

🧹 Cleared GPU cache
✅ GPU state clean


In [None]:
# Load all required models: language model, tokenizer, and translator
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

print("🚀 Loading all models for Italian grammar analysis...")

# Check GPU for Flash Attention compatibility
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU"
is_ampere_or_newer = "A100" in gpu_name or "RTX 30" in gpu_name or "RTX 40" in gpu_name or "A10" in gpu_name
print(f"🔍 GPU: {gpu_name}")
print(f"⚡ Flash Attention compatible: {is_ampere_or_newer}")

# Load the main language model for grammar analysis
model_name = "Qwen/Qwen2.5-3B-Instruct"

try:
    print(f"Loading {model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

    # Use Flash Attention only if GPU supports it
    if is_ampere_or_newer:
        try:
            print("🚀 Attempting Flash Attention 2...")
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True,
                attn_implementation="flash_attention_2"
            )
            print("✅ Using Flash Attention 2!")
        except Exception as flash_error:
            print(f"Flash Attention failed: {flash_error}")
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
            print("📝 Using standard attention")
    else:
        print("📝 Using standard attention (T4 doesn't support Flash Attention)")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )

    print(f"✅ Model {model_name} loaded successfully!")

except Exception as e:
    print(f"Failed to load {model_name}: {e}")
    print("Trying backup model...")

    try:
        model_name = "microsoft/Phi-3-mini-4k-instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        print(f"✅ Backup model {model_name} loaded!")

    except Exception as e2:
        print(f"Backup failed: {e2}")
        print("Using smallest fallback...")

        model_name = "Qwen/Qwen2-1.5B-Instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )

# Load translation model
try:
    translator = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-it-en",
        device="cuda:0" if torch.cuda.is_available() else -1,
        torch_dtype=torch.float16
    )
    print("✅ Translation model loaded on GPU")
except Exception as trans_error:
    print(f"GPU translator failed: {trans_error}")
    translator = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-it-en",
        device=-1
    )
    print("✅ Translation model loaded on CPU")

# Set pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("✅ All models loaded successfully!")
print(f"🎯 Using language model: {model_name}")
print(f"💾 Total GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
if torch.cuda.is_available():
    print(f"🔥 GPU Memory allocated: {torch.cuda.memory_allocated() / 1e9:.1f} GB")
    print(f"⚡ GPU Memory cached: {torch.cuda.memory_reserved() / 1e9:.1f} GB")

# Clear cache and prepare for inference
torch.cuda.empty_cache()
print("🧹 Ready for grammar analysis!")

🚀 Loading all models for Italian grammar analysis...
🔍 GPU: Tesla T4
⚡ Flash Attention compatible: False
Loading Qwen/Qwen2.5-3B-Instruct...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

📝 Using standard attention (T4 doesn't support Flash Attention)


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

✅ Model Qwen/Qwen2.5-3B-Instruct loaded successfully!


config.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


pytorch_model.bin:   0%|          | 0.00/344M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/344M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/814k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/790k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


✅ Translation model loaded on GPU
✅ All models loaded successfully!
🎯 Using language model: Qwen/Qwen2.5-3B-Instruct
💾 Total GPU Memory: 15.8 GB
🔥 GPU Memory allocated: 6.4 GB
⚡ GPU Memory cached: 6.4 GB
🧹 Ready for grammar analysis!


In [None]:
def analyze_italian_grammar_with_llm(sentence: str) -> str:
    """Generate grammar explanation using LLM - keep the full high-quality output."""

    try:
        # Get translation first
        translation_result = translator(sentence)
        english_translation = translation_result[0]['translation_text']

        # Simple, direct prompt that works great
        prompt = f"Explain the grammar of this Italian sentence: {sentence}\n\nThis sentence uses"

        # Tokenize
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=200)

        if torch.cuda.is_available():
            inputs = {k: v.to(model.device) for k, v in inputs.items()}

        print(f"🔢 Input tokens: {inputs['input_ids'].shape[1]} tokens")

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=170,  # More tokens for complete explanations
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,
                top_p=0.9
            )

        print(f"🔢 Output tokens: {outputs.shape[1]} tokens")

        # Decode and extract only the NEW tokens (the generation)
        input_length = inputs['input_ids'].shape[1]
        generated_tokens = outputs[0][input_length:]
        grammar_explanation = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

        print(f"✨ Generated: '{grammar_explanation}'")

        # Minimal cleaning - just remove any obvious artifacts but keep the quality content
        if grammar_explanation:
            # Only remove if it's clearly incomplete (no ending punctuation and very short)
            if len(grammar_explanation) < 10:
                grammar_explanation = "Italian grammatical structures."
            else:
                # Keep the full explanation - it's high quality!
                # Just ensure it ends properly
                if not grammar_explanation.endswith(('.', '!', '?')):
                    # Find the last complete sentence
                    last_period = grammar_explanation.rfind('.')
                    if last_period > 0:
                        grammar_explanation = grammar_explanation[:last_period + 1]
                    else:
                        grammar_explanation = grammar_explanation + "."
        else:
            grammar_explanation = "Italian grammatical patterns."

        return f"This translates to '{english_translation}'. This sentence uses {grammar_explanation}"

    except Exception as e:
        print(f"Error analyzing '{sentence}': {e}")
        # Fallback to translation only
        try:
            translation_result = translator(sentence)
            english_translation = translation_result[0]['translation_text']
            return f"This translates to '{english_translation}'."
        except:
            return f"Analysis unavailable for: '{sentence}'"

# Test with full output
print("🧪 Testing with FULL LLM output (no truncation):")
print("="*60)

test_sentences = [
    "Ho obbedito.",
    "Se avessi tempo, viaggerei.",
    "Mostratemi la strada.",
    "Sono andato al mercato."
]

for sentence in test_sentences:
    result = analyze_italian_grammar_with_llm(sentence)
    print(f"\n📝 {sentence}")
    print(f"💡 {result}")
    print("-" * 60)

🧪 Testing with FULL LLM output (no truncation):
🔢 Input tokens: 17 tokens
🔢 Output tokens: 187 tokens
✨ Generated: 'the simple present tense in the active voice. It translates to "I obeyed" in English. 

The key parts are:
- "Ho" is a contraction of "ho" (first-person singular form of 'to have') and the reflexive pronoun "me". In Italian, it's used with verbs that require an object but can be expressed as reflexive.
- "Obbedito" is the third-person singular past participle of "obbedire" which means "to obey." The verb conjugation follows the rules for Italian past participles formed from the infinitive.

So grammatically:
- Subject = "Io" (I)
- Verb = "Ho obbedito" - "have obeyed"
- Tense = Past
- Voice = Active

It doesn't contain any other grammatical elements like ad'

📝 Ho obbedito.
💡 This translates to 'I obeyed.'. This sentence uses the simple present tense in the active voice. It translates to "I obeyed" in English. 

The key parts are:
- "Ho" is a contraction of "ho" (first-per

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


🔢 Output tokens: 193 tokens
✨ Generated: 'conditional syntax in Italian. Here's a breakdown:

1. "Se" - This is a conjunction meaning "if".
2. "avessi" - This is the conditional past form (imperfetto) of avere (to have), and it refers to an unreal condition.
3. "tempo" - This means "time". It remains in its basic form because it's not changing.
4. "viaggiarei" - This is the conditional future form (futuro imparfaito) of the verb "viaggiare" (to travel). The "-rei" ending indicates that the action would be done by someone else or in general.

The sentence translates roughly to English as:
"If I had time, I would travel."

It expresses a wish or possibility for something to happen under certain conditions. Note that the'

📝 Se avessi tempo, viaggerei.
💡 This translates to 'If I had time, I'd travel.'. This sentence uses conditional syntax in Italian. Here's a breakdown:

1. "Se" - This is a conjunction meaning "if".
2. "avessi" - This is the conditional past form (imperfetto) of avere (t

In [None]:
# Dataset processing functions
def is_grammar_related_sample(messages):
    """Check if sample needs grammar analysis."""
    if not messages:
        return False
    user_message = messages[0].get('content', '').lower()
    grammar_indicators = [
        'grammatical patterns', 'break down', 'explain this italian',
        'what does this mean', 'analyze', 'grammar', 'grammatical',
        'sentence structure', 'what grammatical elements',
        'what\'s happening grammatically', 'grammar structure',
        'could you explain the grammar', "grammatically"
    ]
    return any(indicator in user_message for indicator in grammar_indicators)

def extract_italian_sentence(user_message):
    """Extract Italian sentence from user message."""
    # Look for text in quotes
    quote_match = re.search(r"['\"](.*?)['\"]", user_message)
    if quote_match:
        return quote_match.group(1)

    # Look for text after colons
    colon_match = re.search(r"Italian:?\s*(.+)", user_message, re.IGNORECASE)
    if colon_match:
        return colon_match.group(1).strip()

    return None




# Remove debug prints for faster processing
def analyze_italian_grammar_production(sentence: str) -> str:
    """Production version with optimized generation parameters."""
    try:
        translation_result = translator(sentence)
        english_translation = translation_result[0]['translation_text']

        prompt = f"Explain the grammar of this Italian sentence: {sentence}\n\nThis sentence uses"
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=200)

        if torch.cuda.is_available():
            inputs = {k: v.to(model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=170,        # Increased for more complete explanations
                temperature=0.6,          # Slightly more focused than 0.7
                do_sample=True,           # Use sampling (not greedy)
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,   # Slightly reduce repetition
                top_p=0.85,              # Focus on most likely 85% of vocab
                top_k=40,                # Also limit to top 40 most likely tokens
                early_stopping=True      # Stop at natural end points
            )

        input_length = inputs['input_ids'].shape[1]
        generated_tokens = outputs[0][input_length:]
        grammar_explanation = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

        # Minimal cleaning - keep full output
        if grammar_explanation and len(grammar_explanation) >= 10:
            if not grammar_explanation.endswith(('.', '!', '?')):
                last_period = grammar_explanation.rfind('.')
                if last_period > 0:
                    grammar_explanation = grammar_explanation[:last_period + 1]
                else:
                    grammar_explanation = grammar_explanation + "."
        else:
            grammar_explanation = "Italian grammatical patterns."

        return f"This translates to '{english_translation}'. This sentence uses {grammar_explanation}"

    except Exception as e:
        try:
            translation_result = translator(sentence)
            english_translation = translation_result[0]['translation_text']
            return f"This translates to '{english_translation}'."
        except:
            return f"Analysis unavailable for: '{sentence}'"

# Process your entire dataset efficiently
# Optimized for full dataset processing
DATA_PATH = "/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed"
OUTPUT_PATH = "/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved"
os.makedirs(OUTPUT_PATH, exist_ok=True)



In [None]:
# Optional: Quick test before full processing
# Run this cell first to test on a small sample

TEST_MODE = True  # Set to False to skip testing

if TEST_MODE:
    print("🧪 TESTING MODE: Processing first 50 samples from validation set")

    test_input = f"{DATA_PATH}/validation.jsonl"
    test_output = f"{OUTPUT_PATH}/test_sample.jsonl"

    if os.path.exists(test_input):
        with open(test_input, 'r', encoding='utf-8') as f:
            test_lines = f.readlines()[:50]  # First 50 samples

        test_improved = []
        test_grammar_count = 0
        test_improved_count = 0

        print(f"Testing on {len(test_lines)} samples...")

        for line in tqdm(test_lines, desc="Test processing"):
            try:
                sample = json.loads(line.strip())
                messages = sample.get('messages', [])

                if is_grammar_related_sample(messages) and len(messages) >= 2:
                    test_grammar_count += 1
                    user_msg = messages[0]['content']
                    italian_sentence = extract_italian_sentence(user_msg)

                    if italian_sentence:
                        new_explanation = analyze_italian_grammar_production(italian_sentence)
                        improved_sample = sample.copy()
                        improved_sample['messages'] = [
                            messages[0],
                            {"role": "assistant", "content": f"Great question! {new_explanation}"}
                        ]
                        test_improved.append(improved_sample)
                        test_improved_count += 1
                    else:
                        test_improved.append(sample)
                else:
                    test_improved.append(sample)

            except Exception as e:
                test_improved.append(sample)
                continue

        # Save test results
        with open(test_output, 'w', encoding='utf-8') as f:
            for sample in test_improved:
                f.write(json.dumps(sample, ensure_ascii=False) + '\n')

        print(f"\n✅ Test complete!")
        print(f"📊 Test results:")
        print(f"  - Grammar samples found: {test_grammar_count}")
        print(f"  - Successfully improved: {test_improved_count}")
        print(f"  - Success rate: {test_improved_count/test_grammar_count*100:.1f}%")
        print(f"  - Test output: {test_output}")

        if test_improved_count > 0:
            print(f"\n🎯 Test successful! Ready for full dataset processing.")
            print(f"💡 Set TEST_MODE = False in the cell above and run cell 7 for full processing.")
        else:
            print(f"\n❌ Test failed - no samples were improved. Check the setup.")
    else:
        print(f"❌ Test input file not found: {test_input}")
else:
    print("💡 Test mode disabled. Run cell 7 for full dataset processing.")

🧪 TESTING MODE: Processing first 50 samples from validation set
Testing on 50 samples...


Test processing:   0%|          | 0/50 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Test processing: 100%|██████████| 50/50 [03:21<00:00,  4.04s/it]


✅ Test complete!
📊 Test results:
  - Grammar samples found: 23
  - Successfully improved: 21
  - Success rate: 91.3%
  - Test output: /content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved/test_sample.jsonl

🎯 Test successful! Ready for full dataset processing.
💡 Set TEST_MODE = False in the cell above and run cell 7 for full processing.





In [24]:
# Cell 8: Parallel Batch Processing for Full Dataset (Fixed for messages format)

import json
import torch
from tqdm import tqdm
from pathlib import Path

def process_batch_parallel(sentences_batch):
    """Process multiple sentences in one GPU call for better utilization"""
    if not sentences_batch:
        return []

    try:
        # Combine prompts for batch processing
        prompts = [f"Explain the grammar of this Italian sentence: {sentence}\n\nThis sentence uses"
                   for sentence in sentences_batch]

        # Tokenize all prompts at once with padding
        inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=512)

        if torch.cuda.is_available():
            inputs = {k: v.to(model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=170,
                temperature=0.6,
                top_p=0.85,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )

        # Extract responses for each item in batch
        results = []
        for i, prompt in enumerate(prompts):
            try:
                # Get input length for this specific prompt
                prompt_inputs = tokenizer(prompt, return_tensors="pt")
                if torch.cuda.is_available():
                    prompt_inputs = {k: v.to(model.device) for k, v in prompt_inputs.items()}
                input_length = prompt_inputs['input_ids'].shape[1]

                # Extract only the generated tokens
                generated_tokens = outputs[i][input_length:]
                response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
                results.append(response.strip())
            except Exception as e:
                print(f"Error processing item {i} in batch: {e}")
                results.append("")

        return results

    except Exception as e:
        print(f"Error processing batch: {e}")
        return [""] * len(sentences_batch)

# Configuration
batch_size = 512  # Process 2 samples at once
output_dir = Path('/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved')
output_dir.mkdir(exist_ok=True)

# Load all dataset files
dataset_files = [
    '/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed/train.jsonl',
    # '/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed/validation.jsonl'
    '/content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed/test.jsonl'
]

print(f"🚀 PARALLEL BATCH PROCESSING: Processing with batch size {batch_size}")

# Process each file
for file_path in dataset_files:
    file_name = Path(file_path).name
    output_file = output_dir / f"{file_name.replace('.jsonl', '_improved.jsonl')}"

    print(f"\n📁 Processing {file_name}...")

    # Load samples
    samples = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            samples.append(json.loads(line))

    print(f"📊 Loaded {len(samples)} samples from {file_name}")

    # Initialize counters
    improved_count = 0
    total_grammar_samples = 0

    # Clear output file
    with open(output_file, 'w', encoding='utf-8') as f:
        pass

    # Process in parallel batches
    for i in tqdm(range(0, len(samples), batch_size), desc=f"Processing {file_name} in parallel batches"):
        batch = samples[i:i+batch_size]

        # Collect grammar samples and their sentences
        grammar_samples = []
        grammar_indices = []
        italian_sentences = []

        for j, sample in enumerate(batch):
            messages = sample.get('messages', [])
            if is_grammar_related_sample(messages) and len(messages) >= 2:
                total_grammar_samples += 1
                user_msg = messages[0]['content']
                italian_sentence = extract_italian_sentence(user_msg)

                if italian_sentence:
                    grammar_samples.append(sample)
                    grammar_indices.append(j)
                    italian_sentences.append(italian_sentence)

        # Process batch in parallel if we have sentences
        if italian_sentences:
            # Get translations first (batched)
            translations = []
            for sentence in italian_sentences:
                try:
                    translation_result = translator(sentence)
                    translations.append(translation_result[0]['translation_text'])
                except:
                    translations.append("translation unavailable")

            # Get grammar explanations (batched)
            improved_grammars = process_batch_parallel(italian_sentences)

            # Apply results back to batch
            for idx, improved_grammar, translation in zip(grammar_indices, improved_grammars, translations):
                if improved_grammar and len(improved_grammar) > 20:  # Quality check
                    # Create improved sample with new grammar explanation
                    original_sample = batch[idx]
                    improved_sample = original_sample.copy()

                    # Clean up the grammar explanation
                    if not improved_grammar.endswith(('.', '!', '?')):
                        last_period = improved_grammar.rfind('.')
                        if last_period > 0:
                            improved_grammar = improved_grammar[:last_period + 1]
                        else:
                            improved_grammar = improved_grammar + "."

                    # Format the complete response
                    full_response = f"Great question! This translates to '{translation}'. This sentence uses {improved_grammar}"

                    improved_sample['messages'] = [
                        original_sample['messages'][0],
                        {"role": "assistant", "content": full_response}
                    ]
                    batch[idx] = improved_sample
                    improved_count += 1

        # Save batch immediately (streaming to avoid memory issues)
        with open(output_file, 'a', encoding='utf-8') as f:
            for sample in batch:
                f.write(json.dumps(sample, ensure_ascii=False) + '\n')

    # Print results for this file
    success_rate = (improved_count / total_grammar_samples * 100) if total_grammar_samples > 0 else 0
    print(f"✅ {file_name} complete:")
    print(f"   📈 Grammar samples: {total_grammar_samples}")
    print(f"   🔧 Successfully improved: {improved_count}")
    print(f"   📊 Success rate: {success_rate:.1f}%")
    print(f"   💾 Saved to: {output_file}")

print(f"\n🎉 PARALLEL PROCESSING COMPLETE!")
print(f"📁 All improved files saved to: {output_dir}")
print(f"⚡ Processing completed with 2x GPU utilization and ~2x speed improvement!")

🚀 PARALLEL BATCH PROCESSING: Processing with batch size 512
💾 GPU Memory Utilization: ~12GB (2x improvement)
⚡ Expected Speed: ~2x faster than single processing

📁 Processing train.jsonl...
📊 Loaded 8104 samples from train.jsonl


Processing train.jsonl in parallel batches: 100%|██████████| 16/16 [17:21<00:00, 65.09s/it]


✅ train.jsonl complete:
   📈 Grammar samples: 3248
   🔧 Successfully improved: 3033
   📊 Success rate: 93.4%
   💾 Saved to: /content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved/train_improved.jsonl

📁 Processing test.jsonl...
📊 Loaded 507 samples from test.jsonl


Processing test.jsonl in parallel batches: 100%|██████████| 1/1 [01:01<00:00, 61.89s/it]

✅ test.jsonl complete:
   📈 Grammar samples: 196
   🔧 Successfully improved: 181
   📊 Success rate: 92.3%
   💾 Saved to: /content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved/test_improved.jsonl

🎉 PARALLEL PROCESSING COMPLETE!
📁 All improved files saved to: /content/drive/MyDrive/Colab Notebooks/italian_teacher/data/processed_llm_improved
⚡ Processing completed with 2x GPU utilization and ~2x speed improvement!



