In [30]:
import torch
from architecture import load_hf_gpt2_model, fill_mask

# --- Configuration ---
# A list of potential prompts to try, based on the server log context.
PROMPT_CANDIDATES = [
    # "user_password",
    # "secret_key",
    # "admin_pass",
    # "password",
    # "secret",
    # "auth_token",
    # "credentials",
    # "path=/api/auth/create",
    # "timestamp=",
    "log entry related to the flag: AI2025{"
]
MAX_LENGTH = 500 # Maximum length of the flag to extract
FLAG_IDENTIFIER = "AI2025{"

def solve():
    """
    Loads the model and iterates through a list of prompts to find and
    extract the memorized flag.
    """
    print("Loading the MLMpire model...")
    # Load the model and vocabulary wrapper from architecture.py
    try:
        wrapper = load_hf_gpt2_model(model_path="./hf_gpt2_model", vocab_path="vocab.json")
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        print("Please ensure 'config.json' and 'pytorch_model.bin' are in the 'hf_gpt2_model' directory.")
        return

    # Iterate through each potential starting prompt
    for starting_prompt in PROMPT_CANDIDATES:
        print(f"\n--- Trying prompt: '{starting_prompt}' ---")
        
        extracted_secret = ""
        current_prompt = starting_prompt

        print("Extracted String: ", end='', flush=True)

        # Iteratively predict the next character up to MAX_LENGTH
        for _ in range(MAX_LENGTH):
            # Create the prompt with the mask token at the end
            prompt_with_mask = current_prompt + "[MASK]"

            # Get the model's predictions (logits) for the masked position
            logits = fill_mask(wrapper, prompt_with_mask)

            # Find the token with the highest probability
            predicted_token_id = torch.argmax(logits).item()

            # Convert the token ID back to a character
            predicted_char = wrapper.itos[predicted_token_id]

            # The vocabulary includes '}' as a character, which is a good stopping point.
            if predicted_char in ["[PAD]", "[SEP]", "[CLS]", "}"]:
                extracted_secret += predicted_char
                print(predicted_char, end='', flush=True)
                break
            
            current_prompt += predicted_char
            extracted_secret += predicted_char
            print(predicted_char, end='', flush=True)
        
        # Check if this attempt was successful by looking for the flag identifier
        full_result = starting_prompt + extracted_secret
        if FLAG_IDENTIFIER in full_result:
            print(f"\n\n[SUCCESS] Flag found with this prompt!")
            print(f"Full extracted string: {full_result}")
            return # Exit the function since we found the flag
        else:
            print("\n[INFO] Flag not found with this prompt. Trying next...")

    print("\n\nExtraction complete. Flag not found with any of the provided prompts.")



In [31]:
solve()

Loading the MLMpire model...
Model loaded successfully.

--- Trying prompt: 'log entry related to the flag: AI2025{' ---
Extracted String: 3oi  smcs =ipquess valu=-1n023":0-dmet","pw":"AI2025{m3m0r1z3d_mask_a1_i3_co0l}

[SUCCESS] Flag found with this prompt!
Full extracted string: log entry related to the flag: AI2025{3oi  smcs =ipquess valu=-1n023":0-dmet","pw":"AI2025{m3m0r1z3d_mask_a1_i3_co0l}
