<a href="https://colab.research.google.com/github/Sugaminni/Ai-Programming-Assignment-1/blob/master/Case_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Objective for Case 3


To demonstrate how generative AI can be used to create and detect deepfake phishing content, highlighting the risks and defenses surrounding AI-generated social-engineering attacks.

In [None]:
# Imports needed for Case 3

# Runtime packages
!pip install transformers torch ipywidgets

from transformers import AutoModelForCausalLM, AutoTokenizer # Model + Tokenizer for Qwen
import torch # Device / tensor utils
import re # Text cleaning
import random # Generating dataset variations
import ipywidgets as widgets # Interactive UI
from IPython.display import display, Markdown # Display for outputs

In [None]:
# Device check so code knows how to behave depending on GPU or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on {device}")

In [None]:
# Language Model
gen_model_id = "Qwen/Qwen2.5-3B"

# Loads tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
model = AutoModelForCausalLM.from_pretrained(gen_model_id).to(device)

# Tokenizer turns text into input ids
# Model generates new tokens given the prompt
# .to(device) places the model on GPU if available for faster generation

In [None]:
# Prompt Templates / defines what the model produces (email/message)

TEMPLATES = {
    "impersonation_email": (
        "You are impersonating the HR manager at {company_name}. "
        "Write a short, professional email to {employee_name} asking them to upload their W-4 "
        "form to the secure company portal. Make the tone urgent but plausible."
    ),
    "urgent_transfer": (
        "You are the CEO of {company_name}. Write an email to the finance manager {employee_name} "
        "requesting an immediate wire transfer of ${amount} to vendor account {account_info} "
        "due to a time-sensitive contract."
    ),
    "credential_request": (
        "You are sending a password reset email. Write an email that convinces the user {employee_name} "
        "to click a link and enter their credentials on a page that looks like the company login."
    )
}

Attack Mode Below

In [None]:
# Defines how the model produces the phishing text (These are the attacks the Qwen guard will defend against)

def generate_texst(prompt,
                   max_new_tokens=150,
                   temperature=0.7,
                   top_p=0.9,
                   do_sample=true,
                   reptition_penalty=1.05,
                   stop_sequences=None):
  """
    Generate text from a LLM with controllable sampling.
    - prompt: str, the input prompt (already formatted).
    - max_new_tokens: approximate number of tokens to generate (controls length).
    - temperature: float; 0.0 = deterministic / greedy, >0 more random/creative.
    - top_p: keep smallest set of tokens whose cumulative prob >= top_p.
    - do_sample: if False = greedy decoding (deterministic), else sampling.
    - repetition_penalty: >1 discourages repeating same tokens/phrases.
    - stop_sequences: list of strings; if seen in output, stop generation (post-processed).
    """

    # Tokenize and move to device
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)

    # Call model.generate with sampling options
    out_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=do_sample,
        repetition_penalty=repetition_penalty,
        pad_token_id=tokenizer.eos_token_id,  # avoid warnings about padding
    )

    # Decode and remove the prompt echo
    generated = tokenizer.decode(out_ids[0], skip_special_tokens=True)

    # If model echoes prompt, remove the first occurrence to keep only the new text
    if generated.startswith(prompt):
      generated = generated[len(prompt):]

    #PostProcess to apply stop sequences and normalize whitespaces
    if stop_sequences:
      for s in stop_sequences:
        idx = generated.find(s)
        if idx != -1:
            generated = generated[:idx]
    generated = re.sub(r"\s+", " ", generated).strip()

    return generated

Defense Mode (Qwen Guard)