
# The Structural Factor Analysis of benchmark for Over-Refusal Behavior Based on Varies LLMS

## Model 1: Gemma3-4b

## Model 2: Llama3.1-8b

## Model 3: Qwen3-4b

## Model 4: Gemini-2.5-flash

## Model 5: Deepseek-V3.2

In [1]:
import importlib
import re
import math
import os
import torch
import numpy as np
import pandas as pd
import altair as alt
from statistics import mean
from collections import Counter
from tqdm import tqdm
from functools import reduce
from pathlib import Path
import gc
from typing import List, Tuple, Dict, Any

# --- GLOBAL CONFIGURATION ---
# Slug-to-Professional Name mapping for file handling vs. display
MODEL_MAPPING: Dict[str, str] = {
    "deepseekv32": "deepseek-v3.2",
    "llama318b": "llama3.1-8b",
    "qwen34b": "qwen3-4b",
    "gemini25flash": "gemini-2.5-flash",
    "gemma34b": "gemma3-4b"
}

# Parent directory where your data files are located
BASE_DATA_DIR = "../data/label_fusion"

MODEL_NAME_FALLBACK = "Unknown-LLM"
# --- END GLOBAL CONFIGURATION ---


# --- INITIALIZATION AND UTILITY FUNCTIONS ---

def _imp(name):
    """Safely import a module, prompting for installation if it fails."""
    try:
        return importlib.import_module(name)
    except Exception as e:
        print(f"[WARN]: Please ensure to pip install {name}")
        raise

# Initialize necessary libraries
try:
    stanza = _imp("stanza")
    pd = _imp("pandas")
    np = _imp("numpy")
    alt = _imp("altair")
except Exception:
    print("[ERROR]: Critical dependencies (stanza, pandas, numpy, altair) are missing. Please install them.")

# Altair Configuration
alt.data_transformers.disable_max_rows()
alt.renderers.enable("default")

# Check GPU availability and configure device
USE_GPU = torch.cuda.is_available()
DEVICE = 'cuda' if USE_GPU else 'cpu'
print(f"[INFO]: GPU Acceleration Status: {'Enabled' if USE_GPU else 'Disabled'} (Device: {DEVICE})")

# Stanza NLP Pipeline cache
_NLP_CACHE = {}

def get_nlp(lang_code: str):
    """Get Stanza NLP Pipeline instance, with memory cache and GPU/CPU configuration."""
    if lang_code not in _NLP_CACHE:
        print(f"[INFO]: Loading Stanza Pipeline for language '{lang_code}'...")
        try:
            _NLP_CACHE[lang_code] = stanza.Pipeline(
                lang_code,
                processors='tokenize,pos,lemma,depparse',
                tokenize_no_ssplit=False,
                use_gpu=USE_GPU,
                device=DEVICE
            )
        except Exception:
            print(f"[INFO]: Downloading language model for '{lang_code}'...")
            stanza.download(lang_code)
            _NLP_CACHE[lang_code] = stanza.Pipeline(
                lang_code,
                processors='tokenize,pos,lemma,depparse',
                tokenize_no_ssplit=False,
                use_gpu=USE_GPU,
                device=DEVICE
            )
    return _NLP_CACHE[lang_code]

def release_nlp(lang_code: str):
    """[Memory Cleanup Mechanism] Explicitly release Stanza Pipeline memory and CUDA cache."""
    if lang_code in _NLP_CACHE:
        print(f"[INFO]: Releasing Stanza Pipeline for language '{lang_code}' and clearing GPU memory...")
        del _NLP_CACHE[lang_code]
        _NLP_CACHE.pop(lang_code, None)

        gc.collect()

        if USE_GPU and torch.cuda.is_available():
            try:
                torch.cuda.empty_cache()
                print("[INFO]: PyTorch CUDA cache cleared.")
            except Exception as e:
                print(f"[WARN]: Failed to clear CUDA cache: {e}")
    else:
        print(f"[INFO]: Stanza Pipeline for '{lang_code}' was not found in cache.")

# --- LINGUISTIC FEATURE EXTRACTION FUNCTIONS ---
CN_COMPLEX_PUNCT = re.compile(r"[ÔºõÔºö‚Äî‚Äî‚Ä¶‚Äî]")
SUBORDINATE_TAGS = {"mark", "advcl", "acl", "ccomp", "xcomp", "dep", "parataxis"}

def count_complex_punct(text: str) -> int:
    return len(CN_COMPLEX_PUNCT.findall(str(text)))

def unigram_entropy(tokens):
    if not tokens: return 0.0
    cnt = Counter(tokens)
    n = len(tokens)
    ent = 0.0
    for c in cnt.values():
        p = c / n
        ent -= p * math.log(p + 1e-12)
    return float(ent)

def type_token_ratio(tokens):
    return (len(set(tokens)) / len(tokens)) if tokens else 0.0

def compute_dep_tree_depth(sent):
    children = {}
    for w in sent.words:
        try: head_id = int(w.head)
        except (ValueError, TypeError): continue
        children.setdefault(head_id, []).append(w.id)
    def dfs(node_id, depth):
        if node_id not in children: return depth
        return max(dfs(ch, depth + 1) for ch in children[node_id])
    depths = [dfs(ch, 1) for ch in children.get(0, [])] or [1]
    return max(depths)

def compute_dep_distance_mean(sent):
    if not sent.words: return 0.0
    dists = []
    for w in sent.words:
        if w.head is not None and w.id is not None:
            try:
                head_id = int(w.head)
                word_id = int(w.id)
                if head_id != 0: dists.append(abs(word_id - head_id))
            except (ValueError, TypeError): continue
    return mean(dists) if dists else 0.0

def compute_sub_clause_count(sent):
    return sum(1 for w in sent.words if (w.deprel or '').lower() in SUBORDINATE_TAGS)

def stanza_features_for_text(text: str, nlp):
    text = str(text or "").strip()
    if not text:
        return {
            "character_len": 0, "sentence_count": 0, "token_len": 0,
            "dep_depth_mean": 0.0, "dep_distance_mean": 0.0,
            "sub_clause_count": 0, "punct_complex_count": 0,
            "type_token_ratio": 0.0, "lexical_information_entropy": 0.0
        }

    doc = nlp(text)
    sents = doc.sentences
    sent_count = len(sents)
    tok_len = sum(len(s.words) for s in sents)

    dep_depths = [compute_dep_tree_depth(s) for s in sents] if sents else [0]
    dep_depth_mean = mean(dep_depths) if dep_depths else 0.0

    dep_distance_means = [compute_dep_distance_mean(s) for s in sents] if sents else [0.0]
    dep_distance_mean = mean(dep_distance_means) if dep_distance_means else 0.0

    sub_clause_total = sum(compute_sub_clause_count(s) for s in sents)
    tokens = [w.text for s in sents for w in s.words]

    return {
        "character_len": len(text),
        "sentence_count": sent_count,
        "token_len": tok_len,
        "dep_depth_mean": float(dep_depth_mean),
        "dep_distance_mean": float(dep_distance_mean),
        "sub_clause_count": int(sub_clause_total),
        "punct_complex_count": int(count_complex_punct(text)),
        "type_token_ratio": float(type_token_ratio(tokens)),
        "lexical_information_entropy": float(unigram_entropy(tokens)),
    }
# --- END LINGUISTIC FEATURE EXTRACTION FUNCTIONS ---

# --- PLOTTING UTILITY FUNCTIONS ---

BINS = 20

def _domain_x(arr):
    if len(arr) == 0: return None
    min_val, max_val = np.nanmin(arr), np.nanmax(arr)
    padding = (max_val - min_val) * 0.05
    return [min_val - padding, max_val + padding]

def _domain_y_max(vals_list, bins):
    if not vals_list or all(v.size == 0 for v in vals_list): return [0, 10]
    max_count = 0
    for vals in vals_list:
        if vals.size > 0:
            hist, _ = np.histogram(vals, bins=bins, range=(np.nanmin(vals), np.nanmax(vals)))
            max_count = max(max_count, np.max(hist))

    return [0, int(max_count * 1.8) + 1]


def layered_hist_with_labels(df, label_col, x_field, title, bins, width, height, x_extent=None, y_domain=None):
    """
    [Â∑≤‰øÆÊîπ] Generates a histogram focused ONLY on the 'refuse' data points.
    """
    if x_field not in df.columns or label_col not in df.columns:
        return alt.Chart(pd.DataFrame()).mark_text(text=f"Data Missing: {x_field}").properties(title=title, width=width, height=height)

    # üî¥ ÂÖ≥ÈîÆ‰øÆÊîπÔºöÂè™ËøáÊª§ 'refuse' Êï∞ÊçÆ
    df_filtered = df[df[label_col] == 'refuse'].copy()

    if df_filtered.empty:
        return alt.Chart(pd.DataFrame()).mark_text(text=f"No Refusal Data for: {x_field}").properties(title=title, width=width, height=height)

    base = alt.Chart(df_filtered).properties(title=title + " (Refusal Prompts Only)")

    # ÁªòÂà∂Áõ¥ÊñπÂõæÔºåËÆæÁΩÆÂõ∫ÂÆöÈ¢úËâ≤
    histogram = base.mark_bar(opacity=0.8, color='red').encode(
        x=alt.X(x_field, bin=alt.Bin(extent=x_extent, step=(x_extent[1] - x_extent[0]) / bins if x_extent and bins else None), title=x_field, scale=alt.Scale(domain=x_extent)),
        y=alt.Y('count()', title='Refusal Count', scale=alt.Scale(domain=y_domain)),
        tooltip=[alt.Tooltip('count()', title='Refusal Count')]
    )

    # üî¥ ÂÖ≥ÈîÆ‰øÆÊîπÔºöÂè™ËÆ°ÁÆó 'refuse' ÁªÑÁöÑÂπ≥ÂùáÂÄº
    mean_refuse = df_filtered[x_field].mean()

    # Âè™ÁªòÂà∂ 'refuse' ÁªÑÁöÑÂπ≥ÂùáÂÄºÁ∫ø
    rule_refuse = alt.Chart(pd.DataFrame({'mean': [mean_refuse]})).mark_rule(color='black', strokeDash=[5, 5]).encode(
        x=alt.X('mean:Q', scale=alt.Scale(domain=x_extent)),
        tooltip=[alt.Tooltip('mean', format='.2f', title='Mean (Refuse)')]
    )

    return (histogram + rule_refuse).properties(width=width, height=height)

# --- CORE PROCESSING FUNCTION ---
def process_single_model(model_slug: str, model_name: str, base_dir: str):
    """
    Processes a single model's data, extracts features (with caching and GPU cleanup), and generates plots.
    """
    print(f"\n==================== Starting Analysis for Model: {model_name} (Slug: {model_slug}) ====================")

    # 1. Dynamic Path Determination
    file_name = f"test_{model_slug}_on_local_data_results_labeled.csv"
    CSV_PATH = Path(base_dir) / file_name

    if not CSV_PATH.exists():
        print(f"[ERROR]: Data file not found for {model_name}: {CSV_PATH}")
        return

    # ÁºìÂ≠òË∑ØÂæÑÊåáÂêëÂΩìÂâçÁõÆÂΩï
    CACHE_PATH = Path(f"{model_slug}_features_cache.csv")

    # 2. Data Loading and Preprocessing
    df = pd.read_csv(CSV_PATH)
    df.columns = [c.strip() for c in df.columns]

    def find_col(suffix_regex):
        for c in df.columns:
            if re.search(suffix_regex, c, flags=re.I): return c
        return None

    # Dynamically find result columns (‰ΩøÁî® ^...$ ËøõË°åÁ≤æÁ°ÆÂåπÈÖç)
    TEXT_EN = find_col(r"^English$")
    TEXT_CN = find_col(r"^Chinese$")
    TEXT_MIX = find_col(r"^Mixed$")
    LABEL_EN = "Final_Label_EN"
    LABEL_CN = "Final_Label_CN"
    LABEL_MIX = "Final_Label_MIX"

    if not any(c in df.columns for c in [TEXT_EN, TEXT_CN, TEXT_MIX]):
        print(f"[ERROR]: Could not find required 'result' columns for {model_name}. Skipping.")
        return

    for lab in [LABEL_EN, LABEL_CN, LABEL_MIX]:
        if lab in df.columns:
            df[lab] = df[lab].astype(str).str.lower().str.strip()

    if "id" not in df.columns:
        df = df.reset_index().rename(columns={"index": "id"})
    df = df.rename(columns={"Rewrite Method": "method", "Category": "category"}, errors='ignore')
    df_cn = df.copy()

    # 3. Feature Extraction (with persistent cache and cleanup)
    VARIANTS: List[Tuple[str, str, str, str]] = []
    if TEXT_EN and TEXT_EN in df_cn.columns:
        VARIANTS.append(("EN", TEXT_EN, LABEL_EN, "en"))
    if TEXT_CN and TEXT_CN in df_cn.columns:
        VARIANTS.append(("CN", TEXT_CN, LABEL_CN, "zh"))
    if TEXT_MIX and TEXT_MIX in df_cn.columns:
        VARIANTS.append(("MIX", TEXT_MIX, LABEL_MIX, "zh"))

    df_feat: pd.DataFrame | None = None

    # Try loading from cache
    if CACHE_PATH.exists():
        try:
            df_feat_cached = pd.read_csv(CACHE_PATH)
            print(f"[INFO]: Features loaded successfully from cache: {CACHE_PATH}")
            required_cols = [f"dep_depth_mean_{v[0]}" for v in VARIANTS] + [f"lexical_information_entropy_{v[0]}" for v in VARIANTS]

            # Integrity check
            if all(col in df_feat_cached.columns for col in required_cols) and len(df_feat_cached) == len(df_cn):
                df_feat_cached["id"] = pd.to_numeric(df_feat_cached["id"], errors="coerce").astype("Int64")
                df_cn["id"] = pd.to_numeric(df_cn["id"], errors="coerce").astype("Int64")
                df_feat = df_feat_cached
            else:
                 print("[WARN]: Cache file is incomplete or outdated. Recalculating features.")
        except Exception as e:
            print(f"[ERROR]: Failed to read cache file {CACHE_PATH}. Recalculating features. Error: {e}")

    if df_feat is None:
        feature_frames = []
        df_cn["id"] = pd.to_numeric(df_cn["id"], errors="coerce").astype("Int64")

        for name, text_col, label_col, lang_code in VARIANTS:
            print(f">> Computing features for {name} using column '{text_col}'...")

            # Load Stanza model
            nlp = get_nlp(lang_code)

            rows = []
            for _id, text in tqdm(df_cn[["id", text_col]].itertuples(index=False, name=None), total=len(df_cn)):
                feats = stanza_features_for_text(text, nlp)
                rows.append({f"{k}_{name}": v for k, v in feats.items()})

            df_f = pd.DataFrame(rows)
            df_f["id"] = df_cn["id"]
            df_f.drop(columns=[c for c in df_f.columns if c.startswith('id_')], inplace=True, errors='ignore')

            if label_col in df_cn.columns:
                df_f = df_f.merge(df_cn[["id", label_col]], on="id", how='left')

            feature_frames.append(df_f)

            # [CRITICAL STEP: MEMORY CLEANUP] Release GPU memory after processing a language model
            release_nlp(lang_code)

        # Merge features from all languages
        if len(feature_frames) == 1:
            df_feat = feature_frames[0].copy()
        else:
            df_feat = reduce(lambda left, right: pd.merge(left, right, on='id', how='outer'), feature_frames)

        # Save features to cache file
        try:
            df_feat.to_csv(CACHE_PATH, index=False)
            print(f"[INFO]: Features saved to cache: {CACHE_PATH}")
        except Exception as e:
            print(f"[WARN]: Could not save features to cache file {CACHE_PATH}. Error: {e}")

    # 4. Plotting

    # Define feature column names
    DEP_EN  = f"dep_depth_mean_EN"
    ENT_EN  = f"lexical_information_entropy_EN"
    DEP_CN  = f"dep_depth_mean_CN"
    ENT_CN  = f"lexical_information_entropy_CN"
    DEP_MIX = f"dep_depth_mean_MIX"
    ENT_MIX = f"lexical_information_entropy_MIX"

    dep_cols = [DEP_EN, DEP_CN, DEP_MIX]
    ent_cols = [ENT_EN, ENT_CN, ENT_MIX]

    def get_global_extents(df_feat: pd.DataFrame, dep_cols: List[str], ent_cols: List[str]):
        # Ê≥®ÊÑèÔºöËøôÈáåÊàë‰ª¨ËÆ°ÁÆóÂÖ®Â±ÄËåÉÂõ¥Êó∂Ôºå‰ªçÁÑ∂‰ΩøÁî®ÊâÄÊúâÊï∞ÊçÆÔºàÂåÖÊã¨ 'answer'ÔºâÔºå‰ª•Á°Æ‰øùÊâÄÊúâÂõæË°®ÁöÑ X ËΩ¥Âíå Y ËΩ¥ËåÉÂõ¥‰∏ÄËá¥
        vals_dep = [df_feat[col].dropna().values for col in dep_cols if col in df_feat.columns]
        valid_vals_dep = [v for v in vals_dep if v.size > 0]
        x_extent_dep = _domain_x(np.concatenate(valid_vals_dep)) if valid_vals_dep and np.concatenate(valid_vals_dep).size > 0 else None

        # ‚ö†Ô∏è YËΩ¥ËåÉÂõ¥ÈúÄË¶ÅÈáçÊñ∞ËÆ°ÁÆóÔºåÂõ†‰∏∫Áé∞Âú®Âè™ÊòæÁ§∫'refuse'Êï∞ÊçÆ‰∫Ü
        # ‰∏∫‰∫Ü‰øùËØÅ Y ËΩ¥ËåÉÂõ¥ÂêàÁêÜÔºåÊàë‰ª¨Â∞Ü‰æùËµñ‰∫éÂú® layered_hist_with_labels ÂÜÖÈÉ®Â§ÑÁêÜËøáÊª§ÂêéÁöÑ Y ËΩ¥ËåÉÂõ¥
        y_domain_dep = None

        vals_ent = [df_feat[col].dropna().values for col in ent_cols if col in df_feat.columns]
        valid_vals_ent = [v for v in vals_ent if v.size > 0]
        x_extent_ent = _domain_x(np.concatenate(valid_vals_ent)) if valid_vals_ent and np.concatenate(valid_vals_ent).size > 0 else None
        y_domain_ent = None

        return x_extent_dep, y_domain_dep, x_extent_ent, y_domain_ent

    x_extent_dep, y_domain_dep, x_extent_ent, y_domain_ent = get_global_extents(df_feat, dep_cols, ent_cols)

    # Generate Dependency Depth Charts
    charts_dep = []
    # üî¥ Ê≥®ÊÑèÔºöË∞ÉÁî®ÂáΩÊï∞‰øùÊåÅ‰∏çÂèòÔºå‰ΩÜÂÖ∂ÂÜÖÈÉ®ÈÄªËæëÂ∑≤Áªè‰øÆÊîπ‰∏∫Âè™Â§ÑÁêÜ 'refuse'
    if DEP_EN in df_feat.columns and LABEL_EN in df_feat.columns: charts_dep.append(layered_hist_with_labels(df_feat, LABEL_EN, DEP_EN, "Avg Dependency Tree Depth (English)", BINS, 320, 230, x_extent_dep, y_domain_dep))
    if DEP_CN in df_feat.columns and LABEL_CN in df_feat.columns: charts_dep.append(layered_hist_with_labels(df_feat, LABEL_CN, DEP_CN, "Avg Dependency Tree Depth (Chinese)", BINS, 320, 230, x_extent_dep, y_domain_dep))
    if DEP_MIX in df_feat.columns and LABEL_MIX in df_feat.columns: charts_dep.append(layered_hist_with_labels(df_feat, LABEL_MIX, DEP_MIX, "Avg Dependency Tree Depth (Mixed)", BINS, 320, 230, x_extent_dep, y_domain_dep))
    row1 = alt.hconcat(*charts_dep).resolve_scale(color="independent", y="independent") if charts_dep else alt.Chart(pd.DataFrame()).mark_text(text="No Dependency Depth Charts").properties(width=960, height=230)

    # Generate Lexical Information Entropy Charts
    charts_ent = []
    if ENT_EN in df_feat.columns and LABEL_EN in df_feat.columns: charts_ent.append(layered_hist_with_labels(df_feat, LABEL_EN, ENT_EN, "Lexical Information Entropy (English)", BINS, 320, 230, x_extent_ent, y_domain_ent))
    if ENT_CN in df_feat.columns and LABEL_CN in df_feat.columns: charts_ent.append(layered_hist_with_labels(df_feat, LABEL_CN, ENT_CN, "Lexical Information Entropy (Chinese)", BINS, 320, 230, x_extent_ent, y_domain_ent))
    if ENT_MIX in df_feat.columns and LABEL_MIX in df_feat.columns: charts_ent.append(layered_hist_with_labels(df_feat, LABEL_MIX, ENT_MIX, "Lexical Information Entropy (Mixed)", BINS, 320, 230, x_extent_ent, y_domain_ent))
    row2 = alt.hconcat(*charts_ent).resolve_scale(color="independent", y="independent") if charts_ent else alt.Chart(pd.DataFrame()).mark_text(text="No Lexical Entropy Charts").properties(width=960, height=230)

    # Combine charts and add the professional model name title
    final_chart = (row1 & row2).properties(
        title=alt.TitleParams(
            text=f"Prompt Structural Analysis of Refusal Behavior ({model_name})",
            anchor="middle",
            orient="bottom",
            dy=8,
        )
    )

    print(f"==================== Analysis Completed for Model: {model_name} ====================\n")
    return final_chart

# --- MAIN EXECUTION LOOP ---
MODEL_SLUGS = list(MODEL_MAPPING.keys())
print(f"[INFO]: Total {len(MODEL_SLUGS)} models configured for analysis.")
print("-" * 60)

# 1. ÂÆö‰πâÂπ∂ÂàõÂª∫‰øùÂ≠òÂõæË°®ÁöÑÁõÆÂΩï
SAVE_DIR = Path("images_2")
SAVE_DIR.mkdir(parents=True, exist_ok=True)
print(f"[INFO]: Chart save directory created/verified: {SAVE_DIR.resolve()}")

all_charts = []

# Iterate through the models
for model_slug, model_name in MODEL_MAPPING.items():
    chart = process_single_model(model_slug, model_name, BASE_DATA_DIR)

    if chart:
        all_charts.append(chart)
        # ÂèñÊ∂àÊ≥®ÈáäËøô‰∏ÄË°åÊù•Âú® Notebook ‰∏≠Á´ãÂç≥ÊòæÁ§∫ÂõæË°®
        chart

        # 2. ‰øùÂ≠òÂõæË°®Âà∞ÊåáÂÆöÁöÑÁõÆÂΩï
        file_name = f"{model_slug}_analysis_chart_refusal_only.json"
        save_path = SAVE_DIR / file_name

        try:
            chart.save(save_path)
            print(f"[INFO]: Chart saved for {model_name} to {save_path}")
        except Exception as e:
            print(f"[ERROR]: Failed to save chart for {model_name} to {save_path}. Error: {e}")

print(f"[INFO]: All {len(MODEL_SLUGS)} models processed.")
if not all_charts:
    print("[ERROR]: No charts were generated successfully.")

2025-11-10 17:00:24 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


[INFO]: GPU Acceleration Status: Disabled (Device: cpu)
[INFO]: Total 5 models configured for analysis.
------------------------------------------------------------
[INFO]: Chart save directory created/verified: /Users/ziyin/Workspace/MyLabs/labs2025s2/capstone/code/USYD-25S2-Capstone-CS62-2/evaluation/images_2

>> Computing features for EN using column 'English'...
[INFO]: Loading Stanza Pipeline for language 'en'...


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.11.0.json:   0%|  ‚Ä¶

2025-11-10 17:00:24 INFO: Downloaded file to /Users/ziyin/stanza_resources/resources.json
2025-11-10 17:00:24 INFO: Loading these models for language: en (English):
| Processor | Package           |
---------------------------------
| tokenize  | combined          |
| mwt       | combined          |
| pos       | combined_charlm   |
| lemma     | combined_nocharlm |
| depparse  | combined_charlm   |

2025-11-10 17:00:24 INFO: Using device: cpu
2025-11-10 17:00:24 INFO: Loading: tokenize
2025-11-10 17:00:25 INFO: Loading: mwt
2025-11-10 17:00:25 INFO: Loading: pos
2025-11-10 17:00:25 INFO: Loading: lemma
2025-11-10 17:00:26 INFO: Loading: depparse
2025-11-10 17:00:26 INFO: Done loading processors!
  4%|‚ñé         | 21/600 [00:09<04:08,  2.33it/s]


KeyboardInterrupt: 