<a href="https://colab.research.google.com/github/TheTigerHub/UTD-Summer-2025/blob/main/UTD2025_FMEA_Severity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
print ("installing hugging face libraries for QLoRA")

!pip install -q accelerate==0.30.1 #hardware acceleration
print ("installed accelerate")
!pip install -q peft==0.11.1 #peft is parameter effecient fine tuning which allows LoRA and QLoRA
print ("installed peft")
!pip install -q bitsandbytes #bits and bytes helps with quantization
print("installed bitsandbytes")
!pip install -q transformers==4.41.1 #hugging face library for pre-trained models, tokenizers
print("installed transformers")
!pip install -q datasets==2.19.0 #datasets is used for loading, processing, and managing datasets
print("installed datasets")
!pip install -q scikit-learn==1.4.2 # scikit-learn for data splitting
print("installed scikit-learn")
!pip install -q pandas #used for data manipulation and analysis
print("installed pandas")
!pip install -q openpyxl #read excel
print("installed openpyxl")

print ("\n libraries installed")

installing hugging face libraries for QLoRA
installed accelerate
installed peft
installed bitsandbytes
installed transformers
installed datasets
installed scikit-learn
installed pandas
installed openpyxl

 libraries installed


In [3]:
print ("installing hugging face libraries for QLoRA")

!pip install -q "peft[bnb]" --upgrade #installs peft pacakge with bnb extra, integrates peft and bnb

!pip install -q accelerate bitsandbytes "transformers[torch]" --upgrade #installs transformers with dependencies for PyTorch

!pip install -q datasets==2.19.0 scikit-learn==1.4.2 pandas openpyxl #installs datasets, scikit-learn, pandas, and openpyxl

print ("\n libraries installed")
print ("\n restart runtime")

installing hugging face libraries for QLoRA
[0m
 libraries installed

 restart runtime


In [1]:
#imports libraries and configures data and training

import torch #torch is necessary for the Llama model
import pandas as pd #will be used later to load, clean, and process data before converting to a hugginf face dataset
import numpy as np #library for large, multi dimensional arrays and matricies as well as operations on them
from datasets import Dataset, DatasetDict #provides effecient way to handle large datasets for machine learning, espicially natural language processing
from sklearn.model_selection import train_test_split #from sci-kit learn library, splits dataset into training and validation
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report #these provide information on the accuracy of the model
from transformers import ( #key components from transformers
    AutoTokenizer, #loads correct tokenize automatically, tokenizers convert text into numerical IDs
    AutoModelForSequenceClassification, #standard class the automatically load the correct model for sequence classification tasks
    TrainingArguments, #class to configure training
    Trainer, #class that simplifies training and eval loop for hugging face models
    DataCollatorWithPadding, #pads sequences of data in a batch to be the same legnth
    BitsAndBytesConfig # manual QLoRA config, quantization is important bc of GPU resources
)
from peft import (
    get_peft_model, #wraps model with peft adapter like QLoRA
    LoraConfig, #class for LoRA config
    TaskType, #specifies task type
    prepare_model_for_kbit_training # manual QLoRA setup with kbit quantization
)

import warnings
warnings.filterwarnings("ignore") #makes output cleaner
from huggingface_hub import notebook_login # Keep login for Llama 3.1

CSV_PATH = "2.4.xlsx" #uploaded excel file

# Column Names (Ensure these EXACTLY match your cleaned CSV/Excel headers)
COL_SUBFUNCTION = "Subfunction"
COL_REQUIREMENTS = "Requirements"
COL_FAILURE_MODE = "Potential Failure Mode and descriptions" # Base name, will be cleane
COL_EFFECT_PRIMARY = "Potential Effect(s) of Failure (primary)" # Base name
COL_EFFECT_SECONDARY = "Potential Effect(s) of Failure (secondary)" # Base name
COL_SEVERITY = "Severity" # target column

# Input/Output Columns
INPUT_TEXT_COLS = [
    COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE,
    COL_EFFECT_PRIMARY, COL_EFFECT_SECONDARY
]


#need to fill in empty cells
COLS_TO_FORWARD_FILL = [
    COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE
]

#want to predict this
TARGET_COLUMN = COL_SEVERITY
NUM_LABELS = 10 # Severity can only be 1-10

# Model Config
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct" # using 3.2-1B bc 3.1 access has not been approved
MAX_SEQ_LENGTH = 512 #for memory management

# Training Config
OUTPUT_DIR = "fmea_severity_classifier_llama31_8b_standard_qlora" # <<< New output dir name
LEARNING_RATE = 1e-4      # common QLoRA starting point
NUM_EPOCHS = 3            # Train for 3 epochs which is standard for fine tuning
# MAX_STEPS = 500         # Alternative to epochs
BATCH_SIZE_PER_DEVICE = 1 # may use 4 because model is smaller
GRAD_ACCUMULATION_STEPS = 16 # effective batch size of 16
LORA_R = 16 #the rank for LoRA matricies 16 is common
LORA_ALPHA = 32 #scaling factor for LoRA updates twice or equal to LORA_R is standard
LORA_DROPOUT = 0.05 #used for regularization, 0.05 is typical value
LOGGING_STEPS = 10 #frequent updates
SAVE_STRATEGY = "epoch" #when model should be saved
EVAL_STRATEGY = "epoch"   #when eval should be preformed
# -----done configing LoRA------


#setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #checks if device is uisng cuda GPU, cpu for backup
print(f"Using device: {device}") #prints out what device is being used
if device.type == 'cpu': print("Warning: Running on CPU!") #warning for if using cpu bc cpu training is way too slow

# Setup Label Mappings
labels_list = [str(i) for i in range(1, 11)] #creates list of strings for severity label from "1" to "10"
id2label = {i: label for i, label in enumerate(labels_list)} #maps integers 0-9 to strings "1"-"10" needed for classification head and output interpretation
label2id = {label: i for i, label in enumerate(labels_list)} #reverse of previous line, maps integers "1"-"10" to integers 0-9 which is required for training
print(f"id2label mapping: {id2label}") #print for verification
print(f"label2id mapping: {label2id}") #print for verification

# Check GPU capability for compute dtype in BNBConfig will  use float16 because t4 is the free avalible one
compute_dtype = torch.float16
if torch.cuda.is_available():
    if torch.cuda.get_device_capability()[0] >= 8: # Ampere+ (A100)
        compute_dtype = torch.bfloat16
        print("Compute dtype set to bfloat16 for Ampere+ GPU.")
    else:
        print("Compute dtype set to float16.")

Using device: cuda
id2label mapping: {0: '1', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 9: '10'}
label2id mapping: {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9}
Compute dtype set to float16.


In [3]:
#load and Preprocess Data
#read Excel/CSV, clean header, forward filling, combine text features into a combined "text" column, and "label" column 0-9
#splits data
#converts to DatasetDict
#normalization off by default

print(f"Loading data from '{CSV_PATH}'...")


#try except block handles reading from excel and csv, and then stores it into pandas dataframe, df
try:
    try: df = pd.read_excel(CSV_PATH)
    except Exception: df = pd.read_csv(CSV_PATH)
    #this line will perform string operations to datafram column names, removes new line characters with \n, replaces multiple spaces with a single space
    #removes leading and trailing whitespace
    #saves the processed columns to cleaned_columns list
    original_columns = df.columns.tolist(); df.columns = df.columns.str.replace('\n', '', regex=False).str.replace(' +', ' ', regex=True).str.strip(); cleaned_columns = df.columns.tolist()
    #maps cleaned columns to original columns
    column_map = {clean: orig for clean, orig in zip(cleaned_columns, original_columns)}; print(f"✅ Loaded {len(df)} rows. Cleaned columns: {cleaned_columns}")
except Exception as e: print(f"❌ Error loading data: {e}"); raise #print error if reading from excel or csv produced an exception


# Function to get cleaned name robustly (optional, can hardcode if sure)
def get_cleaned_name(config_name, df_cols, original_map): # Pass original map too
    # Use split() and join() to collapse multiple spaces and remove newlines/strip
    cleaned = ' '.join(str(config_name).replace('\n', '').strip().split()) # same cleaning as before
    if cleaned not in df_cols:
         original_name = original_map.get(cleaned, config_name) # Try lookup original name if clean fails
         print(f"   Warning: Configured column '{config_name}' -> '{cleaned}' not found after cleaning. Check CSV/Excel headers and config variables.")
         # Fallback to original name might be safer if cleaning leads to mismatch
         if original_name in df_cols: return original_name
         return config_name # Return original config if neither found
    return cleaned

# Update configured names based on cleaned names IN THE DATAFRAME
# This has all parts of the FMEA table, and uses the funciton to get cleaned names
# ceratin cells to forward fill bc they are partialy empty in table
COL_SUBFUNCTION = get_cleaned_name(COL_SUBFUNCTION, df.columns, column_map)
COL_REQUIREMENTS = get_cleaned_name(COL_REQUIREMENTS, df.columns, column_map)
COL_FAILURE_MODE = get_cleaned_name(COL_FAILURE_MODE, df.columns, column_map)
COL_EFFECT_PRIMARY = get_cleaned_name(COL_EFFECT_PRIMARY, df.columns, column_map)
#COL_EFFECT_SECONDARY = get_cleaned_name(COL_EFFECT_SECONDARY, df.columns, column_map)
COL_SEVERITY = get_cleaned_name(COL_SEVERITY, df.columns, column_map)
INPUT_TEXT_COLS = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE, COL_EFFECT_PRIMARY] #COL_EFFECT_SECONDARY
COLS_TO_FORWARD_FILL = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE]
TARGET_COLUMN = COL_SEVERITY # Already potentially cleaned
all_needed_columns = INPUT_TEXT_COLS + [TARGET_COLUMN]
print(f"   Using effective columns: {all_needed_columns}")

# Verify Columns Exist
missing_cols = [col for col in all_needed_columns if col not in df.columns]; #list of columns in all_needed_columns not in df.columns, which is all columns
if missing_cols: print(f"❌ Error: Columns missing: {missing_cols}"); raise ValueError("Missing columns") #print out result if missing columns is not empty, and raises error

# Preprocessing
print("⏳ Preprocessing data...")
df_selected = df[all_needed_columns].copy() #only takes needed columns
print(f"   Forward filling columns: {COLS_TO_FORWARD_FILL}...")
df_selected[COLS_TO_FORWARD_FILL] = df_selected[COLS_TO_FORWARD_FILL].ffill() #forward fills the columns that need it
initial_rows = len(df_selected); df_selected = df_selected.dropna(); final_rows = len(df_selected) #count initial rows with len(), then drops rows empty in any column, counts rows after dropping
if initial_rows > final_rows: print(f"   Dropped {initial_rows - final_rows} rows with NaN values.") #prints if rows were dropped
if final_rows == 0: raise ValueError("No data left after NaN drop") #if everything was dropped, raise an Error

# Convert Severity & Validate
try:
    df_selected[TARGET_COLUMN] = pd.to_numeric(df_selected[TARGET_COLUMN], errors='coerce') #converts values in TARGET_COLUMN into a number type, if can't turn into NaN
    df_selected = df_selected.dropna(subset=[TARGET_COLUMN]); df_selected[TARGET_COLUMN] = df_selected[TARGET_COLUMN].astype(int) #removes rows if converted to NaN
except Exception as e: print(f"❌ Error converting Severity: {e}"); raise
#filter out data not in 1-10 range and checks if any data remains
initial_rows = len(df_selected); df_selected = df_selected[df_selected[TARGET_COLUMN].between(1, 10)]; final_rows = len(df_selected)
#sets df_selected to only have rows with value 1-10, then counts the new ammount of rows
if initial_rows > final_rows: print(f"   Removed {initial_rows - final_rows} rows with Severity outside [1, 10].") #prints out how many rows were removed
if final_rows == 0: raise ValueError("No data left with valid Severity (1-10)")

# Combine Text Features
def combine_features(row):
    text_parts = [] #empty list to store the text
    for col in INPUT_TEXT_COLS: value = str(row[col]) if pd.notna(row[col]) else ""; clean_col_name = col.split('(')[0].strip(); text_parts.append(f"{clean_col_name}: {value}")
    #goes through every input column and gets the value if it is not NaN, splits at ( and removes leading and trailing whitespace then adds it to text_parts
    return "\n".join(text_parts) #returns all text parts together with new line between them as a input for the model
print("   Combining input text features into 'text' column...")
df_selected['text'] = df_selected.apply(combine_features, axis=1) #creates new column called text, fills it with the combined features in each row

# Prepare Labels (0-9)
df_selected['label'] = df_selected[TARGET_COLUMN] - 1 #converts from 1-10 to 0-9 because 0 is the first and populates new column label with those values
print(f"   Created 'label' column (0-9) from '{TARGET_COLUMN}'.")

# Keep only necessary columns
df_final = df_selected[['text', 'label']] #just needs the combined text features and the 0-9 severity value

# Create Train/Validation Split
print("⏳ Splitting data...")
train_df, valid_df = train_test_split(df_final, test_size=0.2, random_state=42, stratify=df_final['label']) #80% for training, 20% for validation severity makes sure that severity levels are evenly distributed
print(f"✅ Split complete. Train size: {len(train_df)}, Validation size: {len(valid_df)}") #prints the number of rows for training and validating

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df, preserve_index=False)
valid_dataset = Dataset.from_pandas(valid_df, preserve_index=False)
raw_datasets = DatasetDict({'train': train_dataset, 'validation': valid_dataset}) #create DatasetDict with train and validation and corresponding datasets which is needed for Trainer
print("✅ Data prepared and converted to Hugging Face Datasets format.")
print(raw_datasets)

#  cleanup
import gc; del df, df_selected, df_final, train_df, valid_df; gc.collect() #imports garbage collector, deletes unneeded large pandas dataframes for memory saving, and runs garbage collector for memory

Loading data from '2.4.xlsx'...
✅ Loaded 1923 rows. Cleaned columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Standardised Statement', 'Severity', 'Confidence']
   Using effective columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions', 'Potential Effect(s) of Failure (primary)', 'Severity']
⏳ Preprocessing data...
   Forward filling columns: ['Subfunction', 'Requirements', 'Potential Failure Mode and descriptions']...
   Combining input text features into 'text' column...
   Created 'label' column (0-9) from 'Severity'.
⏳ Splitting data...
✅ Split complete. Train size: 1538, Validation size: 385
✅ Data prepared and converted to Hugging Face Datasets format.
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1538
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 385
    })
})


212

In [10]:
# --- Hugging Face Login---
print("\nPlease log in to Hugging Face using an Access Token with 'read' permission.")
notebook_login()
print("✅ Login process initiated.")
# --- End Login ---
print(f"\n⏳ Loading tokenizer for '{MODEL_NAME}'...")

# Load tokenizer associated with the base model
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Login above handles token
    # Set padding token (Llama 3 uses EOS)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"   Tokenizer pad_token set to eos_token: {tokenizer.pad_token}")
    print("✅ Tokenizer loaded.")
except Exception as e: print(f"❌ Error loading tokenizer: {e}"); raise

# Load tokenizer associated with the base model
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Login above handles token
    # Set padding token (Llama 3 uses EOS)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token #is padding token isnt defined sets the pad token to be same as eos token
        print(f"   Tokenizer pad_token set to eos_token: {tokenizer.pad_token}")
    print("✅ Tokenizer loaded.")
except Exception as e: print(f"❌ Error loading tokenizer: {e}"); raise

# Define tokenization function
def tokenize_function(examples):
    #this tokenizes the "text" column and truncation=true tells it to cut off the at the assigned legnth
    return tokenizer(examples["text"], truncation=True, max_length=MAX_SEQ_LENGTH, padding=False)

print("⏳ Tokenizing datasets...")
#tokenizes entire training and validation data sets
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True, remove_columns=["text"])
#assigns the result of map() to tokenized datasets
#map applies the tokenize funciton in batches
#removes "text" column from tokenized datasets
print("✅ Datasets tokenized.")
print(tokenized_datasets)


Please log in to Hugging Face using an Access Token with 'read' permission.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

✅ Login process initiated.

⏳ Loading tokenizer for 'meta-llama/Meta-Llama-3.1-8B-Instruct'...
❌ Error loading tokenizer: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.


OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [4]:
# Load Llama Model


print("⚙️ Defining 4-bit quantization config (BitsAndBytesConfig)...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, #important setting that enables quantization
    bnb_4bit_quant_type="nf4", #type of quanitzation to use
    bnb_4bit_compute_dtype=compute_dtype, # Determined in Cell 3 based on GPU
    bnb_4bit_use_double_quant=True, #quantizes already quantized data for memory savings
)
print("✅ Quantization config defined.")

# --- Load Base Model with Quantization ---
print(f"⏳ Loading base model '{MODEL_NAME}' for Sequence Classification with 4-bit quantization...")
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, #name of the model
    quantization_config=bnb_config, #passed config from earlier to tell the model to load with 4-bit quantization
    device_map="auto", # Use "auto" for standard HF, should work better on A100
    # device_map = {"": 0}, # Use explicit mapping if "auto" causes issues
    num_labels=NUM_LABELS, #number of output lables, 10 for this case
    id2label=id2label, #dictionary mapping 0-9 to "1"-"10"
    label2id=label2id, #dicionary map the other way
    # ignore_mismatched_sizes=True # Try uncommenting if size mismatch error occurs
)
print("✅ Base model loaded with quantization.")

# Set pad token ID in model config if tokenizer has one (important!)
if tokenizer.pad_token_id is not None: #checks for a padding token
    model.config.pad_token_id = tokenizer.pad_token_id #if there is a padding token, this sets it in the config
    print(f"Model pad_token_id set to: {model.config.pad_token_id}")

# --- Prepare Model for K-bit Training & Apply LoRA using PEFT ---
print("⚙️ Preparing model for K-bit training and defining LoRA config (PEFT)...")
model.gradient_checkpointing_enable() # Often needed for K-bit training, trades memory for computation
model = prepare_model_for_kbit_training(model) #further prepares model

lora_config = LoraConfig(
    r=LORA_R, #sets lora rank to previously defined
    lora_alpha=LORA_ALPHA, #scaling factor, previously defined
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", # Standard Llama 3 targets
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=LORA_DROPOUT, #sets dropout probability to previously defined
    bias="none", #number of bias terms in LoRA matricies
    task_type=TaskType.SEQ_CLS, # Specify Sequence Classification task
)
print("✅ LoRA configuration defined.")

print("⚡️ Applying LoRA adapter to the model using PEFT...")
model = get_peft_model(model, lora_config) # Standard PEFT function, returns new model with LoRA adapters
print("✅ LoRA adapter applied.")
model.print_trainable_parameters() #prints summary of the model's paramaters

# --- Data Collator ---
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) #pads things to maximum legnth
print("✅ Data collator created.")

⚙️ Defining 4-bit quantization config (BitsAndBytesConfig)...
✅ Quantization config defined.
⏳ Loading base model 'meta-llama/Llama-3.2-1B' for Sequence Classification with 4-bit quantization...


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Base model loaded with quantization.
Model pad_token_id set to: 128001
⚙️ Preparing model for K-bit training and defining LoRA config (PEFT)...
✅ LoRA configuration defined.
⚡️ Applying LoRA adapter to the model using PEFT...
✅ LoRA adapter applied.
trainable params: 11,292,672 || all params: 1,247,127,552 || trainable%: 0.9055
✅ Data collator created.


In [None]:
# Training

import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import TrainingArguments, Trainer # Ensure these are imported
import torch # Ensure torch is imported

# --- Define Compute Metrics Function ---
# Keep this function as it's needed for manual evaluation later
def compute_metrics(eval_pred): #function that takes in a tuple with arguments of the prediction and true label
    predictions, labels = eval_pred; preds = np.argmax(predictions, axis=1) #unpacks tuple, and calculates predicted class index
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=0) #computers precision, recall, and f1 scores
    acc = accuracy_score(labels, preds); return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}
    #compares accuracy by comparing true labels and prediction

# --- Define Training Arguments (Workaround Applied) ---
print("⚙️ Setting Training Arguments (evaluation_strategy workaround)...")

# Check GPU capability for fp16/bf16 (should be done in Cell 3, but check again is ok)
bf16_supported = False
fp16_enabled = False
if torch.cuda.is_available():
    if torch.cuda.get_device_capability()[0] >= 8: # Ampere+ (A100, etc.)
        bf16_supported = True
        print("   Setting bf16=True for Ampere+ GPU.")
    else: # T4, V100, etc.
        fp16_enabled = True
        print("   Setting fp16=True for non-Ampere GPU.")

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR + "_chkpts", # Use OUTPUT_DIR from Cell 3 config
    # --- Training Duration & Batching (Use config from Cell 3) ---
    num_train_epochs = NUM_EPOCHS,
    # max_steps = MAX_STEPS, # Alternatively use max_steps
    per_device_train_batch_size=BATCH_SIZE_PER_DEVICE, #from cell 3
    gradient_accumulation_steps=GRAD_ACCUMULATION_STEPS, #from cell 3
    learning_rate=LEARNING_RATE, #from cell 3

    # --- Optimizer & Precision ---
    optim="paged_adamw_8bit", # Recommended 8-bit optimizer for QLoRA
    fp16=fp16_enabled,        # Enable based on GPU check
    bf16=bf16_supported,      # Enable based on GPU check

    # --- Logging & Saving ---
    logging_strategy="steps", #determines when the log info
    logging_steps=LOGGING_STEPS, #from cell 3
    save_strategy=SAVE_STRATEGY,     # e.g., "epoch" or "steps"
    # save_steps = SAVE_STEPS,      # Use if save_strategy="steps"
    save_total_limit=1,          # Optional: keep only last/best checkpoint

    # --- WORKAROUND APPLIED ---
    # evaluation_strategy="epoch", # <<< COMMENTED OUT / REMOVED
    # load_best_model_at_end=True, # <<< MUST be False if not evaluating during training
    # metric_for_best_model="f1",  # <<< Comment out / remove
    load_best_model_at_end=False,  # Explicitly set to False

    # --- Other Args ---
    seed=42, #sets the seed for repoducibility
    report_to="none",
    remove_unused_columns=True, # Safe if Cell 5 removed 'text' column
    gradient_checkpointing=True, # Recommended for standard QLoRA memory saving
    gradient_checkpointing_kwargs={'use_reentrant':False},
)

# --- Create Trainer ---
print("⚙️ Creating Trainer...")
# Ensure model, tokenized_datasets, tokenizer, data_collator exist from previous cells
try:
    #creates trainer with all of the configs from earlier
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"], # Keep for manual eval
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics, # Keep for manual eval
    )
    print("✅ Trainer created.")
except NameError as ne:
    print(f"❌ NameError: A required object (model, dataset, etc.) not found: {ne}")
    print("   Please ensure Cells 3, 4, 5, 6 ran successfully.")
    raise
except Exception as e:
    print(f"❌ Unexpected error creating Trainer: {e}")
    raise

# --- Start Training ---
print(f"\n🚀🚀🚀 Starting Standard QLoRA Fine-tuning! 🚀🚀🚀")
try:
    train_result = trainer.train() # Train the model
    print("\n✅✅✅ Training finished! ✅✅✅")
except Exception as e:
    print(f"❌ An error occurred during trainer.train(): {e}")
    raise

# --- !! Manually Evaluate Model AFTER Training !! ---
print("\n🧪 Evaluating model after training has completed...")
try:
    eval_results = trainer.evaluate(eval_dataset=tokenized_datasets["validation"]) #evaluates model based on validation dataset
    print("\n📊 Final Validation Set Evaluation Results (Manual Trigger):")
    print(eval_results)
    trainer.log_metrics("eval_manual", eval_results)
except Exception as e:
    print(f"❌ Error during manual evaluation: {e}")

# --- Save Final Model State ---
# Note: Saves the model state at the END of training.
print(f"\n💾 Saving final trained model adapter & tokenizer to '{OUTPUT_DIR}'...")
try:
    trainer.save_model(OUTPUT_DIR)
    if 'tokenizer' in locals() and tokenizer is not None: # Save tokenizer if available
         tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅ Final model adapter and tokenizer saved to '{OUTPUT_DIR}'.")
except Exception as e:
     print(f"❌ Error saving model/tokenizer: {e}")

# --- Optional: Clean up GPU memory ---
import gc
# Add del statements for objects no longer needed
# Example: del model, trainer, tokenized_datasets, raw_datasets
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
print("\n🧹 Training cell GPU memory cache potentially cleared.")

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


⚙️ Setting Training Arguments (evaluation_strategy workaround)...
   Setting fp16=True for non-Ampere GPU.
⚙️ Creating Trainer...
✅ Trainer created.

🚀🚀🚀 Starting Standard QLoRA Fine-tuning! 🚀🚀🚀


Step,Training Loss
10,1.7041
20,1.7258
30,1.744
40,1.6718
50,1.6491
60,1.7754
70,1.8045
80,1.7664
90,1.7813
100,1.433


Step,Training Loss
10,1.7041
20,1.7258
30,1.744
40,1.6718
50,1.6491
60,1.7754
70,1.8045
80,1.7664
90,1.7813
100,1.433


In [None]:
# --- Optional Download Code Block (Run in new cell after Cell 7) ---
import shutil
from google.colab import files
import os
import time

folder_to_download = "fmea_severity_classifier_llama31_8b_standard_qlora" # used later
timestamp = time.strftime("%Y%m%d-%H%M%S")
zip_filename = f"{folder_to_download}_{timestamp}.zip"

print(f"\n📦 Preparing folder '{folder_to_download}' for download...")
try:
    if os.path.exists(folder_to_download):
        print(f"   Zipping folder to '{zip_filename}'...")
        shutil.make_archive(folder_to_download, 'zip', folder_to_download)
        print(f"   Zipping complete.")
        print(f"⬇️ Triggering browser download for '{zip_filename}'...")
        files.download(zip_filename) # Trigger download
        print(f"✅ Download initiated. Check your browser.")
    else:
        print(f"❌ Error: Output directory '{folder_to_download}' not found. Cannot download.")
except Exception as e:
    print(f"❌ An error occurred during zipping or downloading: {e}")
# --- End Download Code Block ---

In [None]:
# Evaluate on Validation Set
# Reloads model if needed and prints report


try:
    trainer # Check if trainer from Cell 7 exists
    trainer_to_use = trainer
    # Ensure dataset and mappings are accessible
    dataset_to_eval = tokenized_datasets["validation"]
    id2label_eval = id2label
    NUM_LABELS_EVAL = NUM_LABELS
    print("Using existing trainer object for prediction.")
except NameError:
    print("Trainer object not found. Loading model from disk (Standard QLoRA)...")
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, Trainer, TrainingArguments
    from peft import PeftModel
    import torch

    ADAPTER_PATH_EVAL = OUTPUT_DIR # Use OUTPUT_DIR from Cell 3
    MODEL_NAME_EVAL = MODEL_NAME # Use MODEL_NAME from Cell 3
    # Reload tokenizer
    tokenizer_eval = AutoTokenizer.from_pretrained(ADAPTER_PATH_EVAL) #loads from saved files
    if tokenizer_eval.pad_token is None: tokenizer_eval.pad_token = tokenizer_eval.eos_token #same setup as training
    # Reload base model with quantization config
    compute_dtype_eval = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_eval = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=compute_dtype_eval, bnb_4bit_use_double_quant=True)
    base_model_eval = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME_EVAL, quantization_config=bnb_config_eval, device_map="auto",
        num_labels=NUM_LABELS, id2label=id2label, label2id=label2id
    )
    if base_model_eval.config.pad_token_id is None: base_model_eval.config.pad_token_id = tokenizer_eval.pad_token_id
    #sets padding token id if not already set

    # Load adapter
    model_eval = PeftModel.from_pretrained(base_model_eval, ADAPTER_PATH_EVAL)
    model_eval.eval()
    print("Model reloaded from disk.")
    # Create dummy trainer for .predict()
    dummy_args = TrainingArguments(output_dir="./eval_temp_std", report_to="none", device=model_eval.device)
    eval_trainer = Trainer(model=model_eval, args=dummy_args, tokenizer=tokenizer_eval)
    trainer_to_use = eval_trainer
    # Need to re-run tokenization if 'tokenized_datasets' not available
    # Assuming it's available or re-run Cell 5
    dataset_to_eval = tokenized_datasets["validation"]
    id2label_eval = id2label; NUM_LABELS_EVAL = NUM_LABELS

# Get predictions
predictions_output = trainer_to_use.predict(dataset_to_eval) #pass in eval dataset and runs the model
y_true = predictions_output.label_ids #holds the true labels
y_pred = np.argmax(predictions_output.predictions, axis=1) #calculates predicted label based on raw outputs

# Generate report using label names ("1" to "10")
# Define the full range of expected label indices (0 to 9)
expected_labels = list(range(NUM_LABELS_EVAL)) # Should be [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Ensure target_names correspond to these expected labels
target_names = [id2label_eval[i] for i in expected_labels]

# Call classification_report with the 'labels' parameter specified
report = classification_report(
    y_true,
    y_pred,
    labels=expected_labels, # <<< Tell function to report these labels
    target_names=target_names,
    digits=4,
    zero_division=0
)



In [None]:
# ───────────────────────────────────────────────────────────
# Cell 8A: Run Prediction & Inspect Results
# ───────────────────────────────────────────────────────────
import numpy as np
import pandas as pd # Needed for unique check potentially
# Make sure necessary libraries/objects from previous cells are loaded
# (Trainer, tokenized_datasets, id2label, NUM_LABELS, etc.)
print("\n📋 Preparing for detailed classification report...")
# --- Logic to find or reload trainer and data ---
try:
    trainer # Check if trainer from Cell 7 exists
    # Ensure needed variables are accessible
    if 'trainer_to_use' not in locals(): trainer_to_use = trainer
    if 'dataset_to_eval' not in locals(): dataset_to_eval = tokenized_datasets["validation"]
    if 'id2label_eval' not in locals(): id2label_eval = id2label
    if 'NUM_LABELS_EVAL' not in locals(): NUM_LABELS_EVAL = NUM_LABELS
    print("Using existing trainer object and data for prediction.")
except NameError:
    print("Trainer object or other necessary variables not found. Attempting to reload model...")
    # Include the reloading logic from your original Cell 8 here if needed
    # Make sure ADAPTER_PATH_EVAL, MODEL_NAME_EVAL etc. are defined correctly based on Cell 3/7
    # For simplicity, assuming Cell 7 objects still exist. Add reloading if required.
    print("Error: Cannot proceed without trainer object or reloaded model. Please ensure Cell 7 ran or add reloading code.")
    raise NameError("Trainer not found and reloading logic missing/failed.")
except Exception as e:
    print(f"Error setting up for prediction: {e}")
    raise
# --- End finding trainer/data ---

# --- Get Predictions ---
print(f"\n⏳ Running prediction on validation set ({len(dataset_to_eval)} samples)...")
try:
    predictions_output = trainer_to_use.predict(dataset_to_eval) #passes the model in, and runs forward pass of model without back prop
    print("✅ trainer.predict() finished successfully!")
except Exception as e:
    print(f"❌ Error during trainer.predict(): {e}")
    raise # Stop if prediction fails

# --- Inspect Prediction Outputs ---
try:
    y_true = predictions_output.label_ids #true labels
    y_pred = np.argmax(predictions_output.predictions, axis=1) #predicted labels by taking raw outputs
    print("\n--- Prediction Output Inspection ---")
    print(f"Shape of y_true (true labels): {y_true.shape}")
    print(f"Shape of y_pred (predicted labels): {y_pred.shape}")
    print(f"Unique true labels found in validation set: {np.unique(y_true)}")
    print(f"Unique predicted labels by the model: {np.unique(y_pred)}")
    print(f"Data type of y_true: {y_true.dtype}")
    print(f"Data type of y_pred: {y_pred.dtype}")
    print(f"Any NaN in y_true?: {np.isnan(y_true).any()}")
    # y_pred from argmax should not contain NaN unless logits were NaN
    print("------------------------------------")
    print("\n✅ Inspection complete. If shapes look correct and labels are in range [0-9], proceed to Cell 8B.")

    # Make variables available for the next cell (Colab usually does this automatically)
    # If issues arise, you might need to declare them global, but try without first.

except Exception as e:
    print(f"❌ Error during result inspection: {e}")
    raise
# --- End Inspection ---

# NOTE: We stop here and run the report generation in the next cell (Cell 8B)
# %%

In [None]:
#Generate and Print Report

import numpy as np
from sklearn.metrics import classification_report
# Make sure necessary variables exist from previous cell's execution
# (y_true, y_pred, id2label_eval, NUM_LABELS_EVAL)

print("\n⚙️ Preparing to generate classification report...")

try:
    # Check if needed variables exist
    y_true
    y_pred
    id2label_eval
    NUM_LABELS_EVAL

    # Define the full range of expected label indices (0 to 9)
    expected_labels = list(range(NUM_LABELS_EVAL))
    # Ensure target_names correspond to these expected labels
    target_names = [id2label_eval[i] for i in expected_labels]

    print("⏳ Calculating classification report...")
    # Call classification_report with the 'labels' parameter specified
    report = classification_report(
        y_true,
        y_pred,
        labels=expected_labels, # Tell function all expected labels
        target_names=target_names,
        digits=4,
        zero_division=0 # Handle labels with no predictions/support
    )

    print("\n✅ Report calculation finished.")
    print("\n📋 Classification Report:\n")
    print(report) # <<< Print the calculated report

except NameError as ne:
     print(f"❌ NameError: A required variable (y_true, y_pred, etc.) is missing: {ne}")
     print("   Please ensure Cell 8A ran successfully first.")
except Exception as e:
    print(f"❌ Error during classification_report generation or printing: {e}")
    import traceback
    traceback.print_exc() # Print detailed traceback for errors here

In [None]:
# ───────────────────────────────────────────────────────────
# Cell 9: Manual Prediction Function (Standard QLoRA for Llama 3.1)
# ───────────────────────────────────────────────────────────
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import PeftModel
import torch
import pandas as pd
import numpy as np
# import re # Not needed for classification output

# --- Configuration ---
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
ADAPTER_PATH = "fmea_severity_classifier_llama31_8b_standard_qlora" # <<< same path at cell 7
MAX_SEQ_LENGTH = 512
# Define input columns EXACTLY as used in training (Cell 4)
# Assumes these were correctly defined/cleaned before
COL_SUBFUNCTION = "Subfunction"; COL_REQUIREMENTS = "Requirements"; COL_FAILURE_MODE = "Potential Failure Mode and descriptions"
COL_EFFECT_PRIMARY = "Potential Effect(s) of Failure (primary)"; #COL_EFFECT_SECONDARY = "Potential Effect(s) of Failure (secondary)" #cant use right now
INPUT_COLS_MANUAL = [COL_SUBFUNCTION, COL_REQUIREMENTS, COL_FAILURE_MODE, COL_EFFECT_PRIMARY] #, COL_EFFECT_SECONDARY]
NUM_LABELS = 10
id2label = {i: str(i+1) for i in range(NUM_LABELS)}
# --- End Configuration ---

# --- Load Fine-tuned Model and Tokenizer ---
# Ensure this loading logic runs successfully before prediction
print("⏳ Loading fine-tuned Llama 3.2 model for manual prediction (Standard QLoRA)...")
try:
    # Define quantization config again
    compute_dtype_pred = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
    bnb_config_pred = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype_pred, bnb_4bit_use_double_quant=True,
    )
    # Load base model with quantization, similar as before
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config_pred,
        device_map="auto", # Or {"": 0}
        num_labels=NUM_LABELS,
        id2label=id2label,
        label2id={v: k for k, v in id2label.items()},
        # token = "hf_..." # Add if login via notebook_login() didn't persist
    )
    # Load the tokenizer associated with the saved adapter/base
    tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH) # Load from adapter path
    if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
    if model.config.pad_token_id is None: model.config.pad_token_id = tokenizer.pad_token_id

    # Load the LoRA adapter onto the base model
    print(f"   Applying LoRA adapter from {ADAPTER_PATH}...")
    model = PeftModel.from_pretrained(model, ADAPTER_PATH)
    model.eval()
    device = model.device
    print(f"✅ Model and tokenizer loaded on device: {device}")

except Exception as e: print(f"❌ Error loading model/adapter: {e}"); raise
# --- End Model Loading ---

# --- Define Prediction Function ---
def predict_fmea_severity_final(**kwargs):
    """ Takes keyword arguments for FMEA input features and predicts Severity (1-10). """
    # Build the input text string
    text_parts = []; missing_args = []
    for col in INPUT_COLS_MANUAL:
        value = kwargs.get(col); value = str(value) if pd.notna(value) else ""
        clean_col_name = col.split('(')[0].strip(); text_parts.append(f"{clean_col_name}: {value}")
    combined_text = "\n".join(text_parts)
    print(f"--- Input Text for Model ---\n{combined_text}\n--------------------------")

    # Tokenize
    inputs = tokenizer([combined_text], return_tensors="pt", truncation=True, padding=True, max_length=MAX_SEQ_LENGTH).to(device)

    # Predict
    print("⏳ Predicting severity...")
    with torch.no_grad(): outputs = model(**inputs); logits = outputs.logits
    predicted_class_id = torch.argmax(logits, dim=-1).item()
    predicted_severity = id2label.get(predicted_class_id, "Unknown") # Use mapping

    print(f"✅ Predicted Severity (1-10): {predicted_severity}")
    return predicted_severity

# --- Example Usage (Using User Provided Scenarios)
print("\n--- Manual Prediction Examples (User Provided) ---")

# Example 1: Emergency Maneuvers
print("--- Predicting User Example 1 ---")
pred_user_1 = predict_fmea_severity_final(
    # Use **{} for keys with spaces/symbols, ensure keys match cleaned column names
    **{COL_SUBFUNCTION: "Emergency Maneuvers",
       COL_REQUIREMENTS: "Manage safe operations by reacting to sudden braking or lane changes by other vehicles or objects",
       COL_FAILURE_MODE: "No Function (The autonomous truck fails to detect or react appropriately [brake, steer] to sudden braking, lane changes by other vehicles, or objects appearing in the path, thereby failing to manage safe operations during emergency scenarios.)",
       COL_EFFECT_PRIMARY: "AV fails to apply required emergency braking",
       COL_EFFECT_SECONDARY: "results in traffic citation"}
)
print(f"Predicted Severity for User Example 1: {pred_user_1}\n")

# Example 2: Move For Disabled/Stopped Vehicles
print("--- Predicting User Example 2 ---")
pred_user_2 = predict_fmea_severity_final(
    **{COL_SUBFUNCTION: "Move For Disabled/Stopped Vehicles",
       COL_REQUIREMENTS: "Manage safe operations by operating appropriately to disabled or emergency vehicles that are stationary or stopped on the road or on the shoulder.",
       COL_FAILURE_MODE: "No Function (The autonomous truck fails to detect a stationary disabled/emergency vehicle or fails to execute required safe operations like reducing speed, changing lanes [moving over], or providing adequate lateral clearance, thereby failing to manage safe operations.)",
       COL_EFFECT_PRIMARY: "AV fails to reduce speed when approaching stationary vehicle/personnel",
       COL_EFFECT_SECONDARY: "results in traffic citation"}
)
print(f"Predicted Severity for User Example 2: {pred_user_2}\n")

# --- End Example Usage ---