In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to project
%cd /content/drive/MyDrive/Colab\ Notebooks/italian_teacher

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/italian_teacher


In [2]:
# Install dependencies
!pip install -q transformers trl accelerate peft datasets spacy sentence-transformers bitsandbytes
!python -m spacy download it_core_news_sm

Collecting it-core-news-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/it_core_news_sm-3.8.0/it_core_news_sm-3.8.0-py3-none-any.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('it_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:


import json
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import GRPOConfig, GRPOTrainer
from datasets import Dataset
from getpass import getpass

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Set OpenAI API key for full professional-grade reward function validation
if "OPENAI_API_KEY" not in os.environ:
    OPENAI_API_KEY = getpass("Enter OpenAI API key: ")
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
print("✅ OpenAI API enabled - Full professional validation (spaCy + LLM)")
print("   - Article validation: LLM")
print("   - Grammar backup: LLM for scores < 8.0")
print("   - Coherence checking: LLM")

PyTorch version: 2.8.0+cu126
CUDA available: True
GPU: NVIDIA L4
Enter OpenAI API key: ··········
✅ OpenAI API enabled - Full professional validation (spaCy + LLM)
   - Article validation: LLM
   - Grammar backup: LLM for scores < 8.0
   - Coherence checking: LLM


In [4]:
!pip install json5



In [5]:
from src.rl.reward_function import ExerciseRewardFunction

# Initialize reward function (loads vocabulary + models)
print("Loading reward function...")
reward_fn = ExerciseRewardFunction(disabled_scorers=["fluency"])
print("✅ Reward function ready")

Loading reward function...
Loading spaCy model: it_core_news_sm...
✅ spaCy model loaded
Reward function will use device: cuda
Initializing scorers...
  ✅ LLM scoring enabled for cefr_alignment (batch size: 10)
  ✅ LLM scoring enabled for grammar_correctness (batch size: 10)
  ✅ LLM scoring enabled for coherence (batch size: 10)
Loading sentence transformer for topic similarity...
✅ Sentence transformer loaded in cuda
  ✅ LLM topic checking enabled (OpenAI API)
✅ Reward function initialized. Active scorers: ['json', 'quality', 'linguistic', 'cefr', 'grammar', 'coherence', 'topic']
   Disabled scorers: ['fluency']
✅ Reward function ready


In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load your Round 1 GRPO-trained model for evaluation
MODEL_PATH = "./models/italian_v8_grpo_round2"  # ← Round 1 GRPO model (Oct 17)

print(f"Loading model from: {MODEL_PATH}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    load_in_8bit=True
)
model.eval()

print(f"✅ Model loaded on device: {next(model.parameters()).device}")

Loading model from: ./models/italian_v8_grpo_round2


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Model loaded on device: cuda:0


In [7]:
import os

# Load pre-generated validation requests
if os.path.exists("src/rl/validation_requestss.json"):
    print("Loading existing validation requests...")
    with open("src/rl/validation_requestss.json", "r") as f:
        validation_requests = json.load(f)
else:
    # If not exists, generate them
    from src.rl.generate_training_requests import generate_training_requests
    print("Generating new validation requests...")
    validation_requests = generate_training_requests(
        num_requests=20,
        output_path="src/rl/validation_requests.json"
    )

print(f"✅ Loaded {len(validation_requests)} validation requests")

Generating new validation requests...
Loading vocabulary...
✅ Loaded 16887 Italian words from vocabulary list
Generating 20 training requests...
✅ Saved 20 requests to src/rl/validation_requests.json

📊 Request Statistics:
By CEFR level:
  A1: 0
  A2: 1
  B1: 5
  B2: 8
  C1: 4
  C2: 2

Top 5 grammar focuses:
  conditional: 3
  past_tense: 2
  future_tense: 2
  prepositions: 1
  articles: 1
✅ Loaded 20 validation requests


In [8]:
from src.rl.iterative_training import evaluate_model_on_requests, create_round2_dataset
# After Round 1 training:
low, high = await evaluate_model_on_requests(model, tokenizer, reward_fn, validation_requests, batch_size=50, output_path='eval.json')


✅ Model is already quantized and mapped to devices.
✅ SentenceTransformer moved to CUDA for topic scoring.
📊 Evaluating 20 requests in batches of 50 (max_new_tokens=400)
Model device: cuda:0


Evaluating Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



✅ Evaluation complete. Processed: 20
  Low scoring (<92): 20
  High scoring (>=92): 0
  Avg score (processed): 41.62
💾 Final results saved to eval.json
