<a href="https://colab.research.google.com/github/tigerzhao0/UTD-Summer-2025/blob/main/UTD2025_FMEA_Severity_with_Test_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ───────────────────────────────────────────────────────────
# Cell 1: Install Dependencies (Standard Hugging Face QLoRA)
# ───────────────────────────────────────────────────────────
print("⏳ Installing standard Hugging Face libraries for QLoRA...")

# Install specific versions known to work reasonably well together
# Pinning versions helps ensure compatibility
!pip install -q accelerate==0.30.1
!pip install -q peft==0.11.1
!pip install -q bitsandbytes==0.43.1 # Use a known stable bnb version
!pip install -q transformers==4.41.1 # Use version compatible with PEFT/Accelerate
!pip install -q datasets==2.19.0
!pip install -q scikit-learn==1.4.2
!pip install -q pandas
!pip install -q openpyxl # To read .xlsx files with pandas

# Note: We are NOT installing unsloth.

# Optional: Uninstall xformers (Uncomment if you encounter issues on T4/A100 later)
# print("🔧 Attempting to uninstall xformers (if present)...")
# !pip uninstall -y xformers
# print("✅ xformers uninstalled (if present).")


print("\n✅ Installations finished.")
print("🔴 IMPORTANT: Go to 'Runtime' > 'Restart runtime' in the menu NOW before proceeding!")

⏳ Installing standard Hugging Face libraries for QLoRA...

✅ Installations finished.
🔴 IMPORTANT: Go to 'Runtime' > 'Restart runtime' in the menu NOW before proceeding!


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 1: Install Dependencies (Latest Standard Hugging Face QLoRA for Llama 3.1)
# ───────────────────────────────────────────────────────────
print("⏳ Installing latest standard Hugging Face libraries for QLoRA...")

# Install/Upgrade PEFT with BNB extras first
!pip install -q peft --upgrade
!pip install -q bitsandbytes --upgrade

# Install/Upgrade other core libraries to latest compatible versions
# Removing version pins for transformers, accelerate, bitsandbytes
!pip install -q accelerate bitsandbytes "transformers[torch]" --upgrade

# Install supporting libraries (can keep pins if desired, or remove)
!pip install -q datasets==2.19.0 scikit-learn==1.4.2 pandas openpyxl

# Optional: Uninstall xformers
# print("🔧 Attempting to uninstall xformers (if present)...")
# !pip uninstall -y xformers
# print("✅ xformers uninstalled (if present).")

print("\n✅ Installations finished.")
print("🔴 IMPORTANT: Go to 'Runtime' > 'Restart runtime' in the menu NOW before proceeding!")

⏳ Installing latest standard Hugging Face libraries for QLoRA...

✅ Installations finished.
🔴 IMPORTANT: Go to 'Runtime' > 'Restart runtime' in the menu NOW before proceeding!


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 2: Restart Runtime!
# ───────────────────────────────────────────────────────────
import os
print("💥 Sending signal to restart runtime...")
os.kill(os.getpid(), 9)

In [None]:
from google.colab import files
uploaded = files.upload()

Saving function2.4 0428_10level.xlsx to function2.4 0428_10level.xlsx


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 3: Imports and Configuration (No Unsloth)
# ───────────────────────────────────────────────────────────
# Removed unsloth imports, added BitsAndBytesConfig, prepare_model_for_kbit_training
import torch
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification, # Use standard class
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    BitsAndBytesConfig # <<< Import for manual QLoRA config
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training # <<< Import for manual QLoRA setup
)
import warnings
warnings.filterwarnings("ignore")
from huggingface_hub import notebook_login # Keep login for Llama 3.1

# --- Configuration ---
# Data File
CSV_PATH = "function2.4 0428_10level.xlsx" # <<< Your uploaded CSV/Excel filename

# Column Names (Ensure these EXACTLY match your cleaned CSV/Excel headers)
COL_SUBFUNCTION = "Subfunction"
COL_REQUIREMENTS = "Requirements"
COL_FAILURE_MODE = "Potential Failure Mode and descriptions" # Base name, will be cleaned
COL_EFFECT_PRIMARY = "Potential Effect(s) of Failure (primary)" # Base name
COL_EFFECT_SECONDARY = "Potential Effect(s) of Failure (secondary)" # Base name
COL_SEVERITY = "Severity" # <<< Your target column

# Input/Output Columns
INPUT_TEXT_COLS = [
    COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE,
    COL_EFFECT_PRIMARY, COL_EFFECT_SECONDARY
]
COLS_TO_FORWARD_FILL = [
    COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE
]
TARGET_COLUMN = COL_SEVERITY
NUM_LABELS = 10 # Severity 1-10

# Model Config
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct" # <<< Using Llama-3.1-8B-Instruct
MAX_SEQ_LENGTH = 512

# Training Config
OUTPUT_DIR = "fmea_severity_classifier_llama31_8b_standard_qlora" # <<< New output dir name
LEARNING_RATE = 1e-4      # QLoRA starting point
NUM_EPOCHS = 3            # Train for 3 epochs (adjust as needed)
# MAX_STEPS = 500         # Alternative to epochs
BATCH_SIZE_PER_DEVICE = 1 # <<< Start with 1 for 8B Standard QLoRA on T4/A100, increase if VRAM allows
GRAD_ACCUMULATION_STEPS = 16 # <<< Increase to compensate for smaller batch size (Eff Batch=16)
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
LOGGING_STEPS = 10
SAVE_STRATEGY = "epoch"
EVAL_STRATEGY = "epoch"   # <<< Should work now
# --- End of Configuration ---

# Setup Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cpu': print("⚠️ Warning: Running on CPU!")

# Setup Label Mappings
labels_list = [str(i) for i in range(1, 11)]
id2label = {i: label for i, label in enumerate(labels_list)}
label2id = {label: i for i, label in enumerate(labels_list)}
print(f"id2label mapping: {id2label}")
print(f"label2id mapping: {label2id}")

# Check GPU capability for compute dtype in BNBConfig
compute_dtype = torch.float16
if torch.cuda.is_available():
    if torch.cuda.get_device_capability()[0] >= 8: # Ampere+ (A100)
        compute_dtype = torch.bfloat16
        print("Compute dtype set to bfloat16 for Ampere+ GPU.")
    else:
        print("Compute dtype set to float16.")

Using device: cuda
id2label mapping: {0: '1', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 9: '10'}
label2id mapping: {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9}
Compute dtype set to bfloat16 for Ampere+ GPU.


In [None]:
# ───────────────────────────────────────────────────────────
# ORIGINALORIGINALORIGINALORIGINALORIGINALORIGINALORIGINALORIGINALORIGINALORIGINALORIGINAL
# Cell 4: Load and Preprocess Data
# ───────────────────────────────────────────────────────────
# This cell's code remains the same as the last working version.
# Reads Excel/CSV, cleans headers, forward-fills, combines text features
# into 'text' column, prepares 'label' column (0-9), splits data,
# and converts to DatasetDict. Normalization is OFF by default.

print(f"⏳ Loading data from '{CSV_PATH}'...")
try:
    try: df = pd.read_excel(CSV_PATH)
    except Exception: df = pd.read_csv(CSV_PATH)
    original_columns = df.columns.tolist(); df.columns = df.columns.str.replace('\n', '', regex=False).str.replace(' +', ' ', regex=True).str.strip(); cleaned_columns = df.columns.tolist()
    column_map = {clean: orig for clean, orig in zip(cleaned_columns, original_columns)}; print(f"✅ Loaded {len(df)} rows. Cleaned columns: {cleaned_columns}")
except Exception as e: print(f"❌ Error loading data: {e}"); raise


# Function to get cleaned name robustly (optional, can hardcode if sure)
def get_cleaned_name(config_name, df_cols, original_map): # Pass original map too
    # Use split() and join() to collapse multiple spaces and remove newlines/strip
    cleaned = ' '.join(str(config_name).replace('\n', '').strip().split()) # <<< 修改后的正确代码
    if cleaned not in df_cols:
         original_name = original_map.get(cleaned, config_name) # Try lookup original name if clean fails
         print(f"   Warning: Configured column '{original_name}' -> '{cleaned}' not found after cleaning. Check CSV/Excel headers and config variables.")
         # Fallback to original name might be safer if cleaning leads to mismatch
         if original_name in df_cols: return original_name
         return config_name # Return original config if neither found
    return cleaned


# Update configured names based on cleaned names IN THE DATAFRAME
COL_SUBFUNCTION = get_cleaned_name(COL_SUBFUNCTION, df.columns, column_map)
COL_REQUIREMENTS = get_cleaned_name(COL_REQUIREMENTS, df.columns, column_map)
COL_FAILURE_MODE = get_cleaned_name(COL_FAILURE_MODE, df.columns, column_map)
COL_EFFECT_PRIMARY = get_cleaned_name(COL_EFFECT_PRIMARY, df.columns, column_map)
# COL_EFFECT_SECONDARY = get_cleaned_name(COL_EFFECT_SECONDARY, df.columns, column_map)
COL_SEVERITY = get_cleaned_name(COL_SEVERITY, df.columns, column_map)
INPUT_TEXT_COLS = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE, COL_EFFECT_PRIMARY] #, COL_EFFECT_SECONDARY]
COLS_TO_FORWARD_FILL = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE]
TARGET_COLUMN = COL_SEVERITY # Already potentially cleaned
all_needed_columns = INPUT_TEXT_COLS + [TARGET_COLUMN]
print(f"   Using effective columns: {all_needed_columns}")

# Verify Columns Exist
missing_cols = [col for col in all_needed_columns if col not in df.columns];
if missing_cols: print(f"❌ Error: Columns missing: {missing_cols}"); raise ValueError("Missing columns")

# Preprocessing
print("⏳ Preprocessing data...")
df_selected = df[all_needed_columns].copy()
print(f"   Forward filling columns: {COLS_TO_FORWARD_FILL}...")
df_selected[COLS_TO_FORWARD_FILL] = df_selected[COLS_TO_FORWARD_FILL].ffill()
initial_rows = len(df_selected); df_selected = df_selected.dropna(); final_rows = len(df_selected)
if initial_rows > final_rows: print(f"   Dropped {initial_rows - final_rows} rows with NaN values.")
if final_rows == 0: raise ValueError("No data left after NaN drop")

# Convert Severity & Validate
try:
    df_selected[TARGET_COLUMN] = pd.to_numeric(df_selected[TARGET_COLUMN], errors='coerce')
    df_selected = df_selected.dropna(subset=[TARGET_COLUMN]); df_selected[TARGET_COLUMN] = df_selected[TARGET_COLUMN].astype(int)
except Exception as e: print(f"❌ Error converting Severity: {e}"); raise
initial_rows = len(df_selected); df_selected = df_selected[df_selected[TARGET_COLUMN].between(1, 10)]; final_rows = len(df_selected)
if initial_rows > final_rows: print(f"   Removed {initial_rows - final_rows} rows with Severity outside [1, 10].")
if final_rows == 0: raise ValueError("No data left with valid Severity (1-10)")

# Combine Text Features
def combine_features(row):
    text_parts = []
    for col in INPUT_TEXT_COLS: value = str(row[col]) if pd.notna(row[col]) else ""; clean_col_name = col.split('(')[0].strip(); text_parts.append(f"{clean_col_name}: {value}")
    return "\n".join(text_parts)
print("   Combining input text features into 'text' column...")
df_selected['text'] = df_selected.apply(combine_features, axis=1)

# Prepare Labels (0-9)
df_selected['label'] = df_selected[TARGET_COLUMN] - 1
print(f"   Created 'label' column (0-9) from '{TARGET_COLUMN}'.")

# Keep only necessary columns
df_final = df_selected[['text', 'label']]

# Create Train/Validation Split
print("⏳ Splitting data...")
train_df, valid_df = train_test_split(df_final, test_size=0.2, random_state=42, stratify=df_final['label'])
print(f"✅ Split complete. Train size: {len(train_df)}, Validation size: {len(valid_df)}")

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df, preserve_index=False)
valid_dataset = Dataset.from_pandas(valid_df, preserve_index=False)
raw_datasets = DatasetDict({'train': train_dataset, 'validation': valid_dataset})
print("✅ Data prepared and converted to Hugging Face Datasets format.")
print(raw_datasets)

# Optional cleanup
import gc; del df, df_selected, df_final, train_df, valid_df; gc.collect()

⏳ Loading data from 'function2.4 0428_10level.xlsx'...
✅ Loaded 1923 rows. Cleaned columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Standardised Statement', 'Severity', 'Confidence']
   Using effective columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Severity']
⏳ Preprocessing data...
   Forward filling columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions']...
   Combining input text features into 'text' column...
   Created 'label' column (0-9) from 'Severity'.
⏳ Splitting data...
✅ Split complete. Train size: 1538, Validation size: 385
✅ Data prepared and converted to Hugging Face Datasets format.
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1538
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 385
    })
})


152

In [None]:
# ───────────────────────────────────────────────────────────
# Cell 4: Load and Preprocess Data (Split into Train/Validation/Test)
# ───────────────────────────────────────────────────────────
# This cell's code remains the same as the last working version,
# but now splits the data into train, validation, and test sets.

print(f"⏳ Loading data from '{CSV_PATH}'...")
try:
    try:
        df = pd.read_excel(CSV_PATH)
    except Exception:
        df = pd.read_csv(CSV_PATH)
    original_columns = df.columns.tolist()
    df.columns = df.columns.str.replace('\n', '', regex=False).str.replace(' +', ' ', regex=True).str.strip()
    cleaned_columns = df.columns.tolist()
    column_map = {clean: orig for clean, orig in zip(cleaned_columns, original_columns)}
    print(f"✅ Loaded {len(df)} rows. Cleaned columns: {cleaned_columns}")
except Exception as e:
    print(f"❌ Error loading data: {e}")
    raise

# Function to get cleaned name robustly (optional, can hardcode if sure)
def get_cleaned_name(config_name, df_cols, original_map):
    cleaned = ' '.join(str(config_name).replace('\n', '').strip().split())
    if cleaned not in df_cols:
         original_name = original_map.get(cleaned, config_name)
         print(f"   Warning: Configured column '{original_name}' -> '{cleaned}' not found after cleaning. Check CSV/Excel headers and config variables.")
         if original_name in df_cols: return original_name
         return config_name
    return cleaned

# Update configured names based on cleaned names IN THE DATAFRAME
COL_SUBFUNCTION = get_cleaned_name(COL_SUBFUNCTION, df.columns, column_map)
COL_REQUIREMENTS = get_cleaned_name(COL_REQUIREMENTS, df.columns, column_map)
COL_FAILURE_MODE = get_cleaned_name(COL_FAILURE_MODE, df.columns, column_map)
COL_EFFECT_PRIMARY = get_cleaned_name(COL_EFFECT_PRIMARY, df.columns, column_map)
# COL_EFFECT_SECONDARY = get_cleaned_name(COL_EFFECT_SECONDARY, df.columns, column_map) # Assuming this is intentional based on INPUT_TEXT_COLS later
COL_SEVERITY = get_cleaned_name(COL_SEVERITY, df.columns, column_map)
INPUT_TEXT_COLS = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE, COL_EFFECT_PRIMARY] #, COL_EFFECT_SECONDARY]
COLS_TO_FORWARD_FILL = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE]
TARGET_COLUMN = COL_SEVERITY # Already potentially cleaned
all_needed_columns = INPUT_TEXT_COLS + [TARGET_COLUMN]
print(f"   Using effective columns: {all_needed_columns}")

# Verify Columns Exist
missing_cols = [col for col in all_needed_columns if col not in df.columns];
if missing_cols:
    print(f"❌ Error: Columns missing: {missing_cols}")
    raise ValueError("Missing columns")

# Preprocessing
print("⏳ Preprocessing data...")
df_selected = df[all_needed_columns].copy()
print(f"   Forward filling columns: {COLS_TO_FORWARD_FILL}...")
df_selected[COLS_TO_FORWARD_FILL] = df_selected[COLS_TO_FORWARD_FILL].ffill()
initial_rows = len(df_selected)
df_selected = df_selected.dropna()
final_rows = len(df_selected)
if initial_rows > final_rows:
    print(f"   Dropped {initial_rows - final_rows} rows with NaN values.")
if final_rows == 0:
    raise ValueError("No data left after NaN drop")

# Convert Severity & Validate
try:
    df_selected[TARGET_COLUMN] = pd.to_numeric(df_selected[TARGET_COLUMN], errors='coerce')
    df_selected = df_selected.dropna(subset=[TARGET_COLUMN])
    df_selected[TARGET_COLUMN] = df_selected[TARGET_COLUMN].astype(int)
except Exception as e:
    print(f"❌ Error converting Severity: {e}")
    raise
initial_rows = len(df_selected)
df_selected = df_selected[df_selected[TARGET_COLUMN].between(1, 10)]
final_rows = len(df_selected)
if initial_rows > final_rows:
    print(f"   Removed {initial_rows - final_rows} rows with Severity outside [1, 10].")
if final_rows == 0:
    raise ValueError("No data left with valid Severity (1-10)")

# Combine Text Features
def combine_features(row):
    text_parts = []
    for col in INPUT_TEXT_COLS:
        value = str(row[col]) if pd.notna(row[col]) else ""
        clean_col_name = col.split('(')[0].strip()
        text_parts.append(f"{clean_col_name}: {value}")
    return "\n".join(text_parts)
print("   Combining input text features into 'text' column...")
df_selected['text'] = df_selected.apply(combine_features, axis=1)

# Prepare Labels (0-9)
df_selected['label'] = df_selected[TARGET_COLUMN] - 1
print(f"   Created 'label' column (0-9) from '{TARGET_COLUMN}'.")

# Keep only necessary columns
df_final = df_selected[['text', 'label']]

# Create Train/Validation/Test Split
print("⏳ Splitting data into train (60%), validation (20%), and test (20%)...")
# First split: 80% for train+validation, 20% for test
train_val_df, test_df = train_test_split(
    df_final,
    test_size=0.2,
    random_state=42,
    stratify=df_final['label']
)

# Second split: Split the 80% into 60% train and 20% validation
# Calculate the proportion for the second split (0.2 / 0.8 = 0.25)
val_size_proportion = 0.25 # 20% of the original 80% data
train_df, valid_df = train_test_split(
    train_val_df,
    test_size=val_size_proportion,
    random_state=42,
    stratify=train_val_df['label']
)

print(f"✅ Split complete. Train size: {len(train_df)}, Validation size: {len(valid_df)}, Test size: {len(test_df)}")

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df, preserve_index=False)
valid_dataset = Dataset.from_pandas(valid_df, preserve_index=False)
test_dataset = Dataset.from_pandas(test_df, preserve_index=False) # Add test dataset

raw_datasets = DatasetDict({
    'train': train_dataset,
    'validation': valid_dataset,
    'test': test_dataset # Add test dataset to DatasetDict
})
print("✅ Data prepared and converted to Hugging Face Datasets format.")
print(raw_datasets)

# Optional cleanup
import gc
del df, df_selected, df_final, train_df, valid_df, test_df, train_val_df # Add test_df and train_val_df
gc.collect()

⏳ Loading data from 'function2.4 0428_10level.xlsx'...
✅ Loaded 1923 rows. Cleaned columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Standardised Statement', 'Severity', 'Confidence']
   Using effective columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Severity']
⏳ Preprocessing data...
   Forward filling columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions']...
   Combining input text features into 'text' column...
   Created 'label' column (0-9) from 'Severity'.
⏳ Splitting data into train (60%), validation (20%), and test (20%)...
✅ Split complete. Train size: 1153, Validation size: 385, Test size: 385
✅ Data prepared and converted to Hugging Face Datasets format.
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1153
    })
    validation: Dataset({
        f

192

In [None]:
# ───────────────────────────────────────────────────────────
# Cell 5: Hugging Face Login and Tokenization
# ───────────────────────────────────────────────────────────

# --- Hugging Face Login (Required for Llama 3.1) ---
print("IMPORTANT: You need to request access to meta-llama/Meta-Llama-3.1-8B-Instruct first.")
print("Go here: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct")
print("\nPlease log in to Hugging Face using an Access Token with 'read' permission.")
notebook_login()

print("✅ Login process initiated.")
# --- End Login ---

print(f"\n⏳ Loading tokenizer for '{MODEL_NAME}'...")
# Load tokenizer associated with the base model
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Login above handles token
    # Set padding token (Llama 3 uses EOS)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"   Tokenizer pad_token set to eos_token: {tokenizer.pad_token}")
    print("✅ Tokenizer loaded.")
except Exception as e: print(f"❌ Error loading tokenizer: {e}"); raise

# Define tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=MAX_SEQ_LENGTH, padding=False)

print("⏳ Tokenizing datasets...")
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True, remove_columns=["text"])
print("✅ Datasets tokenized.")
print(tokenized_datasets)

IMPORTANT: You need to request access to meta-llama/Meta-Llama-3.1-8B-Instruct first.
Go here: https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct

Please log in to Hugging Face using an Access Token with 'read' permission.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

✅ Login process initiated.

⏳ Loading tokenizer for 'meta-llama/Meta-Llama-3.1-8B-Instruct'...


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

   Tokenizer pad_token set to eos_token: <|eot_id|>
✅ Tokenizer loaded.
⏳ Tokenizing datasets...


Map:   0%|          | 0/1153 [00:00<?, ? examples/s]

Map:   0%|          | 0/385 [00:00<?, ? examples/s]

Map:   0%|          | 0/385 [00:00<?, ? examples/s]

✅ Datasets tokenized.
DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 1153
    })
    validation: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 385
    })
    test: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 385
    })
})


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 6: Load Llama 3.1 Model (Standard QLoRA)
# ───────────────────────────────────────────────────────────

# --- Define Quantization Config ---
print("⚙️ Defining 4-bit quantization config (BitsAndBytesConfig)...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype, # Determined in Cell 3 based on GPU
    bnb_4bit_use_double_quant=True,
)
print("✅ Quantization config defined.")

# --- Load Base Model with Quantization ---
print(f"⏳ Loading base model '{MODEL_NAME}' for Sequence Classification with 4-bit quantization...")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto", # Use "auto" for standard HF, should work better on A100
    # device_map = {"": 0}, # Use explicit mapping if "auto" causes issues
    num_labels=NUM_LABELS,
    id2label=id2label,
    label2id=label2id,
    # ignore_mismatched_sizes=True # Try uncommenting if size mismatch error occurs
)
print("✅ Base model loaded with quantization.")

# Set pad token ID in model config if tokenizer has one (important!)
if tokenizer.pad_token_id is not None:
    model.config.pad_token_id = tokenizer.pad_token_id
    print(f"Model pad_token_id set to: {model.config.pad_token_id}")

# --- Prepare Model for K-bit Training & Apply LoRA using PEFT ---
print("⚙️ Preparing model for K-bit training and defining LoRA config (PEFT)...")
model.gradient_checkpointing_enable() # Often needed for K-bit training
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", # Standard Llama 3 targets
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type=TaskType.SEQ_CLS, # Specify Sequence Classification task
)
print("✅ LoRA configuration defined.")

print("⚡️ Applying LoRA adapter to the model using PEFT...")
model = get_peft_model(model, lora_config) # Standard PEFT function
print("✅ LoRA adapter applied.")
model.print_trainable_parameters()

# --- Data Collator ---
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
print("✅ Data collator created.")

⚙️ Defining 4-bit quantization config (BitsAndBytesConfig)...
✅ Quantization config defined.
⏳ Loading base model 'meta-llama/Meta-Llama-3.1-8B-Instruct' for Sequence Classification with 4-bit quantization...


config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3.1-8B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Base model loaded with quantization.
Model pad_token_id set to: 128009
⚙️ Preparing model for K-bit training and defining LoRA config (PEFT)...
✅ LoRA configuration defined.
⚡️ Applying LoRA adapter to the model using PEFT...
✅ LoRA adapter applied.
trainable params: 41,984,000 || all params: 7,546,949,632 || trainable%: 0.5563
✅ Data collator created.


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 7: Training (Standard QLoRA - evaluation_strategy Workaround)
# ───────────────────────────────────────────────────────────
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import TrainingArguments, Trainer # Ensure these are imported
import torch # Ensure torch is imported

# --- Define Compute Metrics Function ---
# Keep this function as it's needed for manual evaluation later
def compute_metrics(eval_pred):
    predictions, labels = eval_pred; preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=0)
    acc = accuracy_score(labels, preds); return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}

# --- Define Training Arguments (Workaround Applied) ---
print("⚙️ Setting Training Arguments (evaluation_strategy workaround)...")

# Check GPU capability for fp16/bf16 (should be done in Cell 3, but check again is ok)
bf16_supported = False
fp16_enabled = False
if torch.cuda.is_available():
    if torch.cuda.get_device_capability()[0] >= 8: # Ampere+ (A100, etc.)
        bf16_supported = True
        print("   Setting bf16=True for Ampere+ GPU.")
    else: # T4, V100, etc.
        fp16_enabled = True
        print("   Setting fp16=True for non-Ampere GPU.")

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR + "_chkpts", # Use OUTPUT_DIR from Cell 3 config
    # --- Training Duration & Batching (Use config from Cell 3) ---
    num_train_epochs = NUM_EPOCHS,
    # max_steps = MAX_STEPS, # Alternatively use max_steps
    per_device_train_batch_size=BATCH_SIZE_PER_DEVICE,
    gradient_accumulation_steps=GRAD_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,

    # --- Optimizer & Precision ---
    optim="paged_adamw_8bit", # Recommended 8-bit optimizer for QLoRA
    fp16=fp16_enabled,        # Enable based on GPU check
    bf16=bf16_supported,      # Enable based on GPU check

    # --- Logging & Saving ---
    logging_strategy="steps",
    logging_steps=LOGGING_STEPS,
    save_strategy=SAVE_STRATEGY,     # e.g., "epoch" or "steps"
    # save_steps = SAVE_STEPS,      # Use if save_strategy="steps"
    save_total_limit=1,          # Optional: keep only last/best checkpoint

    # --- WORKAROUND APPLIED ---
    # evaluation_strategy="epoch", # <<< COMMENTED OUT / REMOVED
    # load_best_model_at_end=True, # <<< MUST be False if not evaluating during training
    # metric_for_best_model="f1",  # <<< Comment out / remove
    load_best_model_at_end=False,  # Explicitly set to False

    # --- Other Args ---
    seed=42,
    report_to="none",
    remove_unused_columns=True, # Safe if Cell 5 removed 'text' column
    gradient_checkpointing=True, # Recommended for standard QLoRA memory saving
    gradient_checkpointing_kwargs={'use_reentrant':False},
)

# --- Create Trainer ---
print("⚙️ Creating Trainer...")
# Ensure model, tokenized_datasets, tokenizer, data_collator exist from previous cells
try:
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"], # Keep for manual eval
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics, # Keep for manual eval
    )
    print("✅ Trainer created.")
except NameError as ne:
    print(f"❌ NameError: A required object (model, dataset, etc.) not found: {ne}")
    print("   Please ensure Cells 3, 4, 5, 6 ran successfully.")
    raise
except Exception as e:
    print(f"❌ Unexpected error creating Trainer: {e}")
    raise


# --- Start Training ---
print(f"\n🚀🚀🚀 Starting Standard QLoRA Fine-tuning! 🚀🚀🚀")
try:
    train_result = trainer.train() # Train the model
    print("\n✅✅✅ Training finished! ✅✅✅")
except Exception as e:
    print(f"❌ An error occurred during trainer.train(): {e}")
    raise

# --- !! Manually Evaluate Model AFTER Training !! ---
print("\n🧪 Evaluating model after training has completed...")
try:
    eval_results = trainer.evaluate(eval_dataset=tokenized_datasets["validation"])
    print("\n📊 Final Validation Set Evaluation Results (Manual Trigger):")
    print(eval_results)
    trainer.log_metrics("eval_manual", eval_results)
except Exception as e:
    print(f"❌ Error during manual evaluation: {e}")
# --- End Manual Evaluation ---


# --- Save Final Model State ---
# Note: Saves the model state at the END of training.
print(f"\n💾 Saving final trained model adapter & tokenizer to '{OUTPUT_DIR}'...")
try:
    trainer.save_model(OUTPUT_DIR)
    if 'tokenizer' in locals() and tokenizer is not None: # Save tokenizer if available
         tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅ Final model adapter and tokenizer saved to '{OUTPUT_DIR}'.")
except Exception as e:
     print(f"❌ Error saving model/tokenizer: {e}")


# --- Optional: Clean up GPU memory ---
import gc
# Add del statements for objects no longer needed
# Example: del model, trainer, tokenized_datasets, raw_datasets
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
print("\n🧹 Training cell GPU memory cache potentially cleared.")

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


⚙️ Setting Training Arguments (evaluation_strategy workaround)...
   Setting bf16=True for Ampere+ GPU.
⚙️ Creating Trainer...
✅ Trainer created.

🚀🚀🚀 Starting Standard QLoRA Fine-tuning! 🚀🚀🚀


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,2.8271
20,2.4924
30,2.0911
40,2.11
50,1.7699
60,1.9126
70,1.8872
80,1.5468
90,1.5282
100,1.7249



✅✅✅ Training finished! ✅✅✅

🧪 Evaluating model after training has completed...



📊 Final Validation Set Evaluation Results (Manual Trigger):
{'eval_loss': 1.7698661088943481, 'eval_accuracy': 0.24155844155844156, 'eval_f1': 0.21225991307725797, 'eval_precision': 0.2026424196535928, 'eval_recall': 0.24155844155844156, 'eval_runtime': 10.4234, 'eval_samples_per_second': 36.936, 'eval_steps_per_second': 4.701, 'epoch': 3.0}
***** eval_manual metrics *****
  epoch                   =        3.0
  eval_accuracy           =     0.2416
  eval_f1                 =     0.2123
  eval_loss               =     1.7699
  eval_precision          =     0.2026
  eval_recall             =     0.2416
  eval_runtime            = 0:00:10.42
  eval_samples_per_second =     36.936
  eval_steps_per_second   =      4.701

💾 Saving final trained model adapter & tokenizer to 'fmea_severity_classifier_llama31_8b_standard_qlora'...
✅ Final model adapter and tokenizer saved to 'fmea_severity_classifier_llama31_8b_standard_qlora'.

🧹 Training cell GPU memory cache potentially cleared.


In [None]:
# --- Optional Download Code Block (Run in new cell after Cell 7) ---
import shutil
from google.colab import files
import os
import time

folder_to_download = "fmea_severity_classifier_llama31_8b_standard_qlora" # <<< 确认这是你 Cell 7 保存的文件夹名
timestamp = time.strftime("%Y%m%d-%H%M%S")
zip_filename = f"{folder_to_download}_{timestamp}.zip"

print(f"\n📦 Preparing folder '{folder_to_download}' for download...")
try:
    if os.path.exists(folder_to_download):
        print(f"   Zipping folder to '{zip_filename}'...")
        shutil.make_archive(folder_to_download, 'zip', folder_to_download)
        print(f"   Zipping complete.")
        print(f"⬇️ Triggering browser download for '{zip_filename}'...")
        files.download(zip_filename) # Trigger download
        print(f"✅ Download initiated. Check your browser.")
    else:
        print(f"❌ Error: Output directory '{folder_to_download}' not found. Cannot download.")
except Exception as e:
    print(f"❌ An error occurred during zipping or downloading: {e}")
# --- End Download Code Block ---


📦 Preparing folder 'fmea_severity_classifier_llama31_8b_standard_qlora' for download...
   Zipping folder to 'fmea_severity_classifier_llama31_8b_standard_qlora_20250620-201047.zip'...
   Zipping complete.
⬇️ Triggering browser download for 'fmea_severity_classifier_llama31_8b_standard_qlora_20250620-201047.zip'...
❌ An error occurred during zipping or downloading: Cannot find file: fmea_severity_classifier_llama31_8b_standard_qlora_20250620-201047.zip


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 8: Evaluate on Validation Set (Detailed Report)
# ───────────────────────────────────────────────────────────
# This cell reloads the standard QLoRA model if needed and prints classification report.

print("\n📋 Generating detailed classification report...")
try:
    trainer # Check if trainer from Cell 7 exists
    trainer_to_use = trainer
    # Ensure dataset and mappings are accessible
    dataset_to_eval = tokenized_datasets["validation"]
    id2label_eval = id2label
    NUM_LABELS_EVAL = NUM_LABELS
    print("Using existing trainer object for prediction.")
except NameError:
    print("Trainer object not found. Loading model from disk (Standard QLoRA)...")
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, Trainer, TrainingArguments
    from peft import PeftModel
    import torch

    ADAPTER_PATH_EVAL = OUTPUT_DIR # Use OUTPUT_DIR from Cell 3
    MODEL_NAME_EVAL = MODEL_NAME # Use MODEL_NAME from Cell 3
    # Reload tokenizer
    tokenizer_eval = AutoTokenizer.from_pretrained(ADAPTER_PATH_EVAL)
    if tokenizer_eval.pad_token is None: tokenizer_eval.pad_token = tokenizer_eval.eos_token
    # Reload base model with quantization config
    compute_dtype_eval = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_eval = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=compute_dtype_eval, bnb_4bit_use_double_quant=True)
    base_model_eval = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME_EVAL, quantization_config=bnb_config_eval, device_map="auto",
        num_labels=NUM_LABELS, id2label=id2label, label2id=label2id
    )
    if base_model_eval.config.pad_token_id is None: base_model_eval.config.pad_token_id = tokenizer_eval.pad_token_id
    # Load adapter
    model_eval = PeftModel.from_pretrained(base_model_eval, ADAPTER_PATH_EVAL)
    model_eval.eval()
    print("Model reloaded from disk.")
    # Create dummy trainer for .predict()
    dummy_args = TrainingArguments(output_dir="./eval_temp_std", report_to="none", device=model_eval.device)
    eval_trainer = Trainer(model=model_eval, args=dummy_args, tokenizer=tokenizer_eval)
    trainer_to_use = eval_trainer
    # Need to re-run tokenization if 'tokenized_datasets' not available
    # Assuming it's available or re-run Cell 5
    dataset_to_eval = tokenized_datasets["validation"]
    id2label_eval = id2label; NUM_LABELS_EVAL = NUM_LABELS


# Get predictions
predictions_output = trainer_to_use.predict(dataset_to_eval)
y_true = predictions_output.label_ids
y_pred = np.argmax(predictions_output.predictions, axis=1)


# Generate report using label names ("1" to "10")
# Define the full range of expected label indices (0 to 9)
expected_labels = list(range(NUM_LABELS_EVAL)) # Should be [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Ensure target_names correspond to these expected labels
target_names = [id2label_eval[i] for i in expected_labels]

# Call classification_report with the 'labels' parameter specified
report = classification_report(
    y_true,
    y_pred,
    labels=expected_labels, # <<< 告诉函数要报告这些标签
    target_names=target_names,
    digits=4,
    zero_division=0
)


# Optional Cleanup
# if 'eval_trainer' in locals(): del model_eval, tokenizer_eval, eval_trainer; gc.collect(); torch.cuda.empty_cache()


📋 Generating detailed classification report...
Using existing trainer object for prediction.


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 8A: Run Prediction & Inspect Results
# ───────────────────────────────────────────────────────────
import numpy as np
import pandas as pd # Needed for unique check potentially
# Make sure necessary libraries/objects from previous cells are loaded
# (Trainer, tokenized_datasets, id2label, NUM_LABELS, etc.)
print("\n📋 Preparing for detailed classification report...")

# --- Logic to find or reload trainer and data ---
try:
    trainer # Check if trainer from Cell 7 exists
    # Ensure needed variables are accessible
    if 'trainer_to_use' not in locals(): trainer_to_use = trainer
    if 'dataset_to_eval' not in locals(): dataset_to_eval = tokenized_datasets["validation"]
    if 'id2label_eval' not in locals(): id2label_eval = id2label
    if 'NUM_LABELS_EVAL' not in locals(): NUM_LABELS_EVAL = NUM_LABELS
    print("Using existing trainer object and data for prediction.")
except NameError:
    print("Trainer object or other necessary variables not found. Attempting to reload model...")
    # Include the reloading logic from your original Cell 8 here if needed
    # Make sure ADAPTER_PATH_EVAL, MODEL_NAME_EVAL etc. are defined correctly based on Cell 3/7
    # For simplicity, assuming Cell 7 objects still exist. Add reloading if required.
    print("Error: Cannot proceed without trainer object or reloaded model. Please ensure Cell 7 ran or add reloading code.")
    raise NameError("Trainer not found and reloading logic missing/failed.")
except Exception as e:
    print(f"Error setting up for prediction: {e}")
    raise
# --- End finding trainer/data ---


# --- Get Predictions ---
print(f"\n⏳ Running prediction on validation set ({len(dataset_to_eval)} samples)...")
try:
    predictions_output = trainer_to_use.predict(dataset_to_eval)
    print("✅ trainer.predict() finished successfully!")
except Exception as e:
    print(f"❌ Error during trainer.predict(): {e}")
    raise # Stop if prediction fails

# --- Inspect Prediction Outputs ---
try:
    y_true = predictions_output.label_ids
    y_pred = np.argmax(predictions_output.predictions, axis=1)
    print("\n--- Prediction Output Inspection ---")
    print(f"Shape of y_true (true labels): {y_true.shape}")
    print(f"Shape of y_pred (predicted labels): {y_pred.shape}")
    print(f"Unique true labels found in validation set: {np.unique(y_true)}")
    print(f"Unique predicted labels by the model: {np.unique(y_pred)}")
    print(f"Data type of y_true: {y_true.dtype}")
    print(f"Data type of y_pred: {y_pred.dtype}")
    print(f"Any NaN in y_true?: {np.isnan(y_true).any()}")
    # y_pred from argmax should not contain NaN unless logits were NaN
    print("------------------------------------")
    print("\n✅ Inspection complete. If shapes look correct and labels are in range [0-9], proceed to Cell 8B.")

    # Make variables available for the next cell (Colab usually does this automatically)
    # If issues arise, you might need to declare them global, but try without first.

except Exception as e:
    print(f"❌ Error during result inspection: {e}")
    raise
# --- End Inspection ---

# NOTE: We stop here and run the report generation in the next cell (Cell 8B)


📋 Preparing for detailed classification report...
Using existing trainer object and data for prediction.

⏳ Running prediction on validation set (385 samples)...


✅ trainer.predict() finished successfully!

--- Prediction Output Inspection ---
Shape of y_true (true labels): (385,)
Shape of y_pred (predicted labels): (385,)
Unique true labels found in validation set: [2 3 4 5 6 7 8 9]
Unique predicted labels by the model: [3 5 7 8]
Data type of y_true: int64
Data type of y_pred: int64
Any NaN in y_true?: False
------------------------------------

✅ Inspection complete. If shapes look correct and labels are in range [0-9], proceed to Cell 8B.


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 8B: Generate and Print Classification Report
# ───────────────────────────────────────────────────────────
import numpy as np
from sklearn.metrics import classification_report
# Make sure necessary variables exist from previous cell's execution
# (y_true, y_pred, id2label_eval, NUM_LABELS_EVAL)

print("\n⚙️ Preparing to generate classification report...")

try:
    # Check if needed variables exist
    y_true
    y_pred
    id2label_eval
    NUM_LABELS_EVAL

    # Define the full range of expected label indices (0 to 9)
    expected_labels = list(range(NUM_LABELS_EVAL))
    # Ensure target_names correspond to these expected labels
    target_names = [id2label_eval[i] for i in expected_labels]

    print("⏳ Calculating classification report...")
    # Call classification_report with the 'labels' parameter specified
    report = classification_report(
        y_true,
        y_pred,
        labels=expected_labels, # Tell function all expected labels
        target_names=target_names,
        digits=4,
        zero_division=0 # Handle labels with no predictions/support
    )

    print("\n✅ Report calculation finished.")
    print("\n📋 Classification Report:\n")
    print(report) # <<< Print the calculated report

except NameError as ne:
     print(f"❌ NameError: A required variable (y_true, y_pred, etc.) is missing: {ne}")
     print("   Please ensure Cell 8A ran successfully first.")
except Exception as e:
    print(f"❌ Error during classification_report generation or printing: {e}")
    import traceback
    traceback.print_exc() # Print detailed traceback for errors here


⚙️ Preparing to generate classification report...
⏳ Calculating classification report...

✅ Report calculation finished.

📋 Classification Report:

              precision    recall  f1-score   support

           1     0.0000    0.0000    0.0000         0
           2     0.0000    0.0000    0.0000         0
           3     0.0000    0.0000    0.0000        14
           4     0.2570    0.4742    0.3333        97
           5     0.0000    0.0000    0.0000         7
           6     0.2019    0.2258    0.2132        93
           7     0.0000    0.0000    0.0000         5
           8     0.2222    0.1875    0.2034        32
           9     0.2667    0.1961    0.2260       102
          10     0.0000    0.0000    0.0000        35

   micro avg     0.2416    0.2416    0.2416       385
   macro avg     0.0948    0.1084    0.0976       385
weighted avg     0.2026    0.2416    0.2123       385



In [None]:
# ───────────────────────────────────────────────────────────
# FIRST TRY
# Cell TEST: Predict on Test Set and Inspect Results
# ───────────────────────────────────────────────────────────
import numpy as np
import pandas as pd # Often useful for inspecting/saving predictions
from datasets import Dataset # Needed if test_dataset isn't directly available

print("\n📋 Running prediction on the Test Set...")

# --- Logic to find or reload trainer and data ---
try:
    trainer # Check if trainer from Cell 7 exists
    # Ensure needed variables are accessible
    if 'trainer_to_use' not in locals(): trainer_to_use = trainer
    # Use the 'test' split from the tokenized_datasets
    if 'dataset_to_predict' not in locals(): dataset_to_predict = tokenized_datasets["test"]
    if 'id2label_pred' not in locals(): id2label_pred = id2label # Use id2label from Cell 3/9
    if 'NUM_LABELS_PRED' not in locals(): NUM_LABELS_PRED = NUM_LABELS # Use NUM_LABELS from Cell 3/9
    print("Using existing trainer object and 'test' data for prediction.")
except NameError:
    print("Trainer object or other necessary variables not found. Attempting to reload model for prediction...")
    # --- Reload Model and Tokenizer (Similar to Cell 8 for evaluation report) ---
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, Trainer, TrainingArguments
    from peft import PeftModel
    import torch

    ADAPTER_PATH_PRED = OUTPUT_DIR # Use OUTPUT_DIR from Cell 3
    MODEL_NAME_PRED = MODEL_NAME # Use MODEL_NAME from Cell 3
    # Assuming tokenized_datasets is available, if not, you might need to reload raw_datasets and re-tokenize
    if 'tokenized_datasets' not in locals():
        print("❌ Error: tokenized_datasets not found. Cannot load test set.")
        raise NameError("tokenized_datasets is missing.")

    # Reload tokenizer
    tokenizer_pred = AutoTokenizer.from_pretrained(ADAPTER_PATH_PRED)
    if tokenizer_pred.pad_token is None: tokenizer_pred.pad_token = tokenizer_pred.eos_token
    # Reload base model with quantization config
    compute_dtype_pred = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_pred = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=compute_dtype_pred, bnb_4bit_use_double_quant=True)
    base_model_pred = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME_PRED, quantization_config=bnb_config_pred, device_map="auto",
        num_labels=NUM_LABELS, id2label=id2label, label2id=label2id # Use NUM_LABELS, id2label, label2id from Cell 3
    )
    if base_model_pred.config.pad_token_id is None: base_model_pred.config.pad_token_id = tokenizer_pred.pad_token_id
    # Load adapter
    model_pred = PeftModel.from_pretrained(base_model_pred, ADAPTER_PATH_PRED)
    model_pred.eval()
    print("Model reloaded from disk for prediction.")
    # Create dummy trainer for .predict()
    dummy_args_pred = TrainingArguments(output_dir="./pred_temp_std", report_to="none", device=model_pred.device)
    pred_trainer = Trainer(model=model_pred, args=dummy_args_pred, tokenizer=tokenizer_pred)
    trainer_to_use = pred_trainer
    # Use the 'test' split
    dataset_to_predict = tokenized_datasets["test"]
    id2label_pred = id2label # Use id2label from Cell 3/9
    NUM_LABELS_PRED = NUM_LABELS # Use NUM_LABELS from Cell 3/9


except Exception as e:
    print(f"❌ Error setting up for test set prediction: {e}")
    raise # Stop if setup fails
# --- End finding trainer/data ---


# --- Get Predictions on the Test Set ---
print(f"\n⏳ Running prediction on test set ({len(dataset_to_predict)} samples)...")
try:
    # Use the 'test' split here
    predictions_output_test = trainer_to_use.predict(dataset_to_predict)
    print("✅ trainer.predict() finished successfully on test set!")
except Exception as e:
    print(f"❌ Error during trainer.predict() on test set: {e}")
    raise # Stop if prediction fails

# --- Extract Predicted Labels ---
try:
    # The logits are in predictions_output_test.predictions
    test_predictions_logits = predictions_output_test.predictions
    # Get the predicted class ID (0-9)
    test_predicted_class_ids = np.argmax(test_predictions_logits, axis=1)

    # Get the true labels (if available in the test dataset)
    # predictions_output_test.label_ids will be None if the test dataset
    # does not have a 'label' column. In our case, it does.
    test_true_labels = predictions_output_test.label_ids # These are 0-9

    # Convert predicted class IDs (0-9) to severity values (1-10)
    test_predicted_severity_values = [id2label_pred.get(class_id, "Unknown") for class_id in test_predicted_class_ids]

    print("\n--- Test Set Prediction Results Summary ---")
    print(f"Number of samples predicted: {len(test_predicted_severity_values)}")

    # Optionally, print the first few predictions
    print("First 10 predicted severity values:", test_predicted_severity_values[:10])
    print("First 10 true severity values (if available):", test_true_labels[:10]) # Note: true labels are 0-9

    # You can now use test_predicted_severity_values and test_true_labels (0-9)
    # for further analysis or reporting if needed (e.g., calculating metrics on the test set).

    print("\n✅ Test set prediction complete. Predicted severity values are stored in 'test_predicted_severity_values'.")

    # Optional: Create a DataFrame or Series to easily view or save results
    # You might want the original text too, which isn't directly in the tokenized dataset predict output
    # If you need the original text, you'd need to join back to the original test_df or raw_datasets['test']

except Exception as e:
    print(f"❌ Error processing test set prediction results: {e}")
    import traceback
    traceback.print_exc() # Print detailed traceback
# --- End Prediction Extraction ---

# Optional Cleanup
if 'pred_trainer' in locals(): del model_pred, tokenizer_pred, pred_trainer; gc.collect(); torch.cuda.empty_cache()


📋 Running prediction on the Test Set...
Using existing trainer object and 'test' data for prediction.

⏳ Running prediction on test set (385 samples)...


✅ trainer.predict() finished successfully on test set!

--- Test Set Prediction Results Summary ---
Number of samples predicted: 385
First 10 predicted severity values: ['9', '6', '4', '4', '6', '4', '9', '9', '9', '6']
First 10 true severity values (if available): [8 8 9 3 7 3 5 9 3 8]

✅ Test set prediction complete. Predicted severity values are stored in 'test_predicted_severity_values'.


In [None]:
# ───────────────────────────────────────────────────────────
# Cell TEST: Predict on Test Set and Inspect Results
# ───────────────────────────────────────────────────────────
import numpy as np
import pandas as pd # Often useful for inspecting/saving predictions
from datasets import Dataset # Needed if test_dataset isn't directly available
from sklearn.metrics import accuracy_score # Import accuracy_score

print("\n📋 Running prediction on the Test Set...")

# --- Logic to find or reload trainer and data ---
try:
    trainer # Check if trainer from Cell 7 exists
    # Ensure needed variables are accessible
    if 'trainer_to_use' not in locals(): trainer_to_use = trainer
    # Use the 'test' split from the tokenized_datasets
    if 'dataset_to_predict' not in locals(): dataset_to_predict = tokenized_datasets["test"]
    if 'id2label_pred' not in locals(): id2label_pred = id2label # Use id2label from Cell 3/9
    if 'NUM_LABELS_PRED' not in locals(): NUM_LABELS_PRED = NUM_LABELS # Use NUM_LABELS from Cell 3/9
    print("Using existing trainer object and 'test' data for prediction.")
except NameError:
    print("Trainer object or other necessary variables not found. Attempting to reload model for prediction...")
    # --- Reload Model and Tokenizer (Similar to Cell 8 for evaluation report) ---
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, Trainer, TrainingArguments
    from peft import PeftModel
    import torch

    ADAPTER_PATH_PRED = OUTPUT_DIR # Use OUTPUT_DIR from Cell 3
    MODEL_NAME_PRED = MODEL_NAME # Use MODEL_NAME from Cell 3
    # Assuming tokenized_datasets is available, if not, you might need to reload raw_datasets and re-tokenize
    if 'tokenized_datasets' not in locals():
        print("❌ Error: tokenized_datasets not found. Cannot load test set.")
        raise NameError("tokenized_datasets is missing.")

    # Reload tokenizer
    tokenizer_pred = AutoTokenizer.from_pretrained(ADAPTER_PATH_PRED)
    if tokenizer_pred.pad_token is None: tokenizer_pred.pad_token = tokenizer_pred.eos_token
    # Reload base model with quantization config
    compute_dtype_pred = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_pred = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=compute_dtype_pred, bnb_4bit_use_double_quant=True)
    base_model_pred = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME_PRED, quantization_config=bnb_config_pred, device_map="auto",
        num_labels=NUM_LABELS, id2label=id2label, label2id=label2id # Use NUM_LABELS, id2label, label2id from Cell 3
    )
    if base_model_pred.config.pad_token_id is None: base_model_pred.config.pad_token_id = tokenizer_pred.pad_token_id
    # Load adapter
    model_pred = PeftModel.from_pretrained(base_model_pred, ADAPTER_PATH_PRED)
    model_pred.eval()
    print("Model reloaded from disk for prediction.")
    # Create dummy trainer for .predict()
    dummy_args_pred = TrainingArguments(output_dir="./pred_temp_std", report_to="none", device=model_pred.device)
    pred_trainer = Trainer(model=model_pred, args=dummy_args_pred, tokenizer=tokenizer_pred)
    trainer_to_use = pred_trainer
    # Use the 'test' split
    dataset_to_predict = tokenized_datasets["test"]
    id2label_pred = id2label # Use id2label from Cell 3/9
    NUM_LABELS_PRED = NUM_LABELS # Use NUM_LABELS from Cell 3/9


except Exception as e:
    print(f"❌ Error setting up for test set prediction: {e}")
    raise # Stop if setup fails
# --- End finding trainer/data ---


# --- Get Predictions on the Test Set ---
print(f"\n⏳ Running prediction on test set ({len(dataset_to_predict)} samples)...")
try:
    # Use the 'test' split here
    predictions_output_test = trainer_to_use.predict(dataset_to_predict)
    print("✅ trainer.predict() finished successfully on test set!")
except Exception as e:
    print(f"❌ Error during trainer.predict() on test set: {e}")
    raise # Stop if prediction fails

# --- Extract Predicted Labels ---
try:
    # The logits are in predictions_output_test.predictions
    y_pred_test = np.argmax(predictions_output_test.predictions, axis=1)
    y_true_test = predictions_output_test.label_ids

    print("\n--- Test Set Prediction Results ---")
    print(f"Total samples in test set: {len(y_true_test)}")

    # Calculate the number of correctly predicted samples
    correct_predictions_count = np.sum(y_true_test == y_pred_test)
    print(f"Number of correctly predicted samples: {correct_predictions_count}")

    # Calculate and print accuracy
    test_accuracy = accuracy_score(y_true_test, y_pred_test)
    print(f"Accuracy on the test set: {test_accuracy:.4f}")

    print("-----------------------------------")

except Exception as e:
    print(f"❌ Error extracting or analyzing test set predictions: {e}")
    raise
# --- End Inspection ---

# Optional Cleanup
if 'pred_trainer' in locals(): del model_pred, tokenizer_pred, pred_trainer; gc.collect(); torch.cuda.empty_cache()


📋 Running prediction on the Test Set...
Using existing trainer object and 'test' data for prediction.

⏳ Running prediction on test set (385 samples)...


✅ trainer.predict() finished successfully on test set!

--- Test Set Prediction Results ---
Total samples in test set: 385
Number of correctly predicted samples: 91
Accuracy on the test set: 0.2364
-----------------------------------


In [None]:
# ───────────────────────────────────────────────────────────
# Cell 9: Manual Prediction Function (Standard QLoRA for Llama 3.1)
# ───────────────────────────────────────────────────────────
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import PeftModel
import torch
import pandas as pd
import numpy as np
# import re # Not needed for classification output

# --- Configuration ---
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
ADAPTER_PATH = "fmea_severity_classifier_llama31_8b_standard_qlora" # <<< 确认这是 Cell 7 保存的路径
MAX_SEQ_LENGTH = 512
# Define input columns EXACTLY as used in training (Cell 4)
# Assumes these were correctly defined/cleaned before
COL_SUBFUNCTION = "Subfunction"; COL_REQUIREMENTS = "Requirements"; COL_FAILURE_MODE = "Potential Failure Mode and descriptions"
COL_EFFECT_PRIMARY = "Potential Effect(s) of Failure (primary)"; COL_EFFECT_SECONDARY = "Potential Effect(s) of Failure (secondary)"
INPUT_COLS_MANUAL = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE, COL_EFFECT_PRIMARY, COL_EFFECT_SECONDARY]
NUM_LABELS = 10
id2label = {i: str(i+1) for i in range(NUM_LABELS)}
# --- End Configuration ---


# --- Load Fine-tuned Model and Tokenizer ---
# Ensure this loading logic runs successfully before prediction
print("⏳ Loading fine-tuned Llama 3.1 model for manual prediction (Standard QLoRA)...")
try:
    # Define quantization config again
    compute_dtype_pred = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_pred = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype_pred, bnb_4bit_use_double_quant=True,
    )
    # Load base model with quantization
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config_pred,
        device_map="auto", # Or {"": 0}
        num_labels=NUM_LABELS,
        id2label=id2label,
        label2id={v: k for k, v in id2label.items()},
        # token = "hf_..." # Add if login via notebook_login() didn't persist
    )
    # Load the tokenizer associated with the saved adapter/base
    tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH) # Load from adapter path
    if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
    if model.config.pad_token_id is None: model.config.pad_token_id = tokenizer.pad_token_id

    # Load the LoRA adapter onto the base model
    print(f"   Applying LoRA adapter from {ADAPTER_PATH}...")
    model = PeftModel.from_pretrained(model, ADAPTER_PATH)
    model.eval()
    device = model.device
    print(f"✅ Model and tokenizer loaded on device: {device}")

except Exception as e: print(f"❌ Error loading model/adapter: {e}"); raise
# --- End Model Loading ---


# --- Define Prediction Function ---
def predict_fmea_severity_final(**kwargs):
    """ Takes keyword arguments for FMEA input features and predicts Severity (1-10). """
    # Build the input text string
    text_parts = []; missing_args = []
    for col in INPUT_COLS_MANUAL:
        value = kwargs.get(col); value = str(value) if pd.notna(value) else ""
        clean_col_name = col.split('(')[0].strip(); text_parts.append(f"{clean_col_name}: {value}")
    combined_text = "\n".join(text_parts)
    print(f"--- Input Text for Model ---\n{combined_text}\n--------------------------")

    # Tokenize
    inputs = tokenizer([combined_text], return_tensors="pt", truncation=True, padding=True, max_length=MAX_SEQ_LENGTH).to(device)

    # Predict
    print("⏳ Predicting severity...")
    with torch.no_grad(): outputs = model(**inputs); logits = outputs.logits
    predicted_class_id = torch.argmax(logits, dim=-1).item()
    predicted_severity = id2label.get(predicted_class_id, "Unknown") # Use mapping

    print(f"✅ Predicted Severity (1-10): {predicted_severity}")
    return predicted_severity

# --- Example Usage (Using User Provided Scenarios) --- # <<< 修改部分开始 >>>
print("\n--- Manual Prediction Examples (User Provided) ---")

# Example 1: Emergency Maneuvers
print("--- Predicting User Example 1 ---")
pred_user_1 = predict_fmea_severity_final(
    # Use **{} for keys with spaces/symbols, ensure keys match cleaned column names
    **{COL_SUBFUNCTION: "Emergency Maneuvers",
       COL_REQUIREMENTS: "Manage safe operations by reacting to sudden braking or lane changes by other vehicles or objects",
       COL_FAILURE_MODE: "No Function (The autonomous truck fails to detect or react appropriately [brake, steer] to sudden braking, lane changes by other vehicles, or objects appearing in the path, thereby failing to manage safe operations during emergency scenarios.)",
       COL_EFFECT_PRIMARY: "AV fails to apply required emergency braking",
       COL_EFFECT_SECONDARY: "results in traffic citation"}
)
print(f"Predicted Severity for User Example 1: {pred_user_1}\n")

# Example 2: Move For Disabled/Stopped Vehicles
print("--- Predicting User Example 2 ---")
pred_user_2 = predict_fmea_severity_final(
    **{COL_SUBFUNCTION: "Move For Disabled/Stopped Vehicles",
       COL_REQUIREMENTS: "Manage safe operations by operating appropriately to disabled or emergency vehicles that are stationary or stopped on the road or on the shoulder.",
       COL_FAILURE_MODE: "No Function (The autonomous truck fails to detect a stationary disabled/emergency vehicle or fails to execute required safe operations like reducing speed, changing lanes [moving over], or providing adequate lateral clearance, thereby failing to manage safe operations.)",
       COL_EFFECT_PRIMARY: "AV fails to reduce speed when approaching stationary vehicle/personnel",
       COL_EFFECT_SECONDARY: "results in traffic citation"}
)
print(f"Predicted Severity for User Example 2: {pred_user_2}\n")

# --- End Example Usage --- # <<< 修改部分结束 >>>

⏳ Loading fine-tuned Llama 3.1 model for manual prediction (Standard QLoRA)...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3.1-8B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


   Applying LoRA adapter from fmea_severity_classifier_llama31_8b_standard_qlora...
✅ Model and tokenizer loaded on device: cuda:0

--- Manual Prediction Examples (User Provided) ---
--- Predicting User Example 1 ---
--- Input Text for Model ---
Subfunction: Emergency Maneuvers
Requirements: Manage safe operations by reacting to sudden braking or lane changes by other vehicles or objects
Potential Failure Mode and descriptions: No Function (The autonomous truck fails to detect or react appropriately [brake, steer] to sudden braking, lane changes by other vehicles, or objects appearing in the path, thereby failing to manage safe operations during emergency scenarios.)
Potential Effect: AV fails to apply required emergency braking
Potential Effect: results in traffic citation
--------------------------
⏳ Predicting severity...
✅ Predicted Severity (1-10): 4
Predicted Severity for User Example 1: 4

--- Predicting User Example 2 ---
--- Input Text for Model ---
Subfunction: Move For Disab