In [None]:
import torch

if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU detected, using CPU.")

Using GPU: NVIDIA A100-SXM4-40GB


# Mistral LLM

In [None]:
# To clear output
from IPython.display import clear_output
clear_output()

In [None]:
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

clear_output()

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
data = pd.read_csv("/content/drive/My Drive/DSC 514 NLP/sampled_data.csv")

In [None]:
from unsloth import FastLanguageModel
import torch
# Set the maximum tokens that the model can handle
max_seq_length = 4050
dtype = None
# Use 4bit quantization to reduce memory usage
load_in_4bit = True


# From FastLanguageModel return the model and the tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.19: Fast Mistral patching. Transformers: 4.50.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

In [None]:
# Prepare the model for fine tuning - update only some of the model's parameters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# Data Preparation

In [None]:
# Split our dataframe into train and test dataframes
train_df_original, test_df_original = train_test_split(data, test_size=0.2, random_state=42)
train_df_original.head()

Unnamed: 0,movie_id,plot_summary,duration,genre,rating_x,release_date,plot_synopsis,review_date,user_id,is_spoiler,review_text,rating_y,review_summary,whole_review
3941,tt1074638,When Bond's latest assignment goes gravely wro...,2h 23min,"['Action', 'Adventure', 'Thriller']",7.8,2012-11-09,"The film begins in Istanbul, where James Bond ...",22 November 2012,ur37965905,True,Mendes so meticulously crafts scenes to explor...,3,Batman NOT Bond,Mendes so meticulously crafts scenes to explor...
1897,tt0134847,"The space transport vessel ""Hunter-Gratzner"" c...",1h 49min,"['Horror', 'Sci-Fi']",7.1,2000-02-18,In the distant future in another part of the g...,31 July 2001,ur0762705,True,(Might contain minor spoilers.)Watched this on...,6,An OK Sci-Fi Horror Flick (if you're into that...,(Might contain minor spoilers.)Watched this on...
2229,tt0212346,Undercover FBI agent Gracie Hart shows no sign...,1h 49min,"['Action', 'Comedy', 'Crime']",6.2,2000-12-22,The film opens at a school where a boy is pick...,2 July 2009,ur0819382,True,"I guess ""unpretentious"" is as good a way as an...",6,Modest comedy.,"I guess ""unpretentious"" is as good a way as an..."
4299,tt1389072,"""Downsizing"" follows a kindly occupational the...",2h 15min,"['Comedy', 'Drama', 'Sci-Fi']",5.8,2017-12-22,The film opens at a research facility in Berge...,22 December 2017,ur69980801,True,"If the environment... or rather, The Environme...",1,Not What It Promises to Be as an Obvious Agend...,"If the environment... or rather, The Environme..."
2868,tt0364725,White Goodman (Ben Stiller) is the owner and f...,1h 32min,"['Comedy', 'Sport']",6.7,2004-06-18,The movie opens with an infomercial for Globo ...,22 April 2014,ur6918917,True,"Vince Vaughn, Ben Stiller, Christine Taylor, J...",6,"""Good Comedy!""","Vince Vaughn, Ben Stiller, Christine Taylor, J..."


In [None]:
# Create copy of training and test data
train_df = train_df_original.copy()
test_df = test_df_original.copy()

# Classify as spoiler/non-spoiler based only on the review

We will use the Alpaca-style prompt for our classification problem.

In [None]:
# Define prompt template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

# Define a function that adds instruction, input, and output columns to our dataset
def add_instruction(df):
    df["instruction"] = "Classify the following movie review as either 'spoiler' or 'non-spoiler'."
    df["input"] = df["whole_review"]
    df["output"] = df["is_spoiler"].apply(lambda x: "spoiler" if x else "non-spoiler")
    return df

# Add instruction, input and output columns to training and test dataframes
train_df = add_instruction(train_df)
#test_df = add_instruction(test_df)

# Convert training and test dataframes to HuggingFace Datasets
from datasets import Dataset
train_dataset = Dataset.from_pandas(train_df)
#test_dataset = Dataset.from_pandas(test_df)

EOS_TOKEN = tokenizer.eos_token
# Function that converts dataset to instruction-style using the alpaca_prompt
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):

        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

# Convert training and test dataset to instruction-style
train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)
#test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/4268 [00:00<?, ? examples/s]

In [None]:
# Just to view the extra columns that we have added
train_dataset.to_pandas().head()

Unnamed: 0,movie_id,plot_summary,duration,genre,rating_x,release_date,plot_synopsis,review_date,user_id,is_spoiler,review_text,rating_y,review_summary,whole_review,instruction,input,output,__index_level_0__,text
0,tt1074638,When Bond's latest assignment goes gravely wro...,2h 23min,"['Action', 'Adventure', 'Thriller']",7.8,2012-11-09,"The film begins in Istanbul, where James Bond ...",22 November 2012,ur37965905,True,Mendes so meticulously crafts scenes to explor...,3,Batman NOT Bond,Mendes so meticulously crafts scenes to explor...,Classify the following movie review as either ...,Mendes so meticulously crafts scenes to explor...,spoiler,3941,"Below is an instruction that describes a task,..."
1,tt0134847,"The space transport vessel ""Hunter-Gratzner"" c...",1h 49min,"['Horror', 'Sci-Fi']",7.1,2000-02-18,In the distant future in another part of the g...,31 July 2001,ur0762705,True,(Might contain minor spoilers.)Watched this on...,6,An OK Sci-Fi Horror Flick (if you're into that...,(Might contain minor spoilers.)Watched this on...,Classify the following movie review as either ...,(Might contain minor spoilers.)Watched this on...,spoiler,1897,"Below is an instruction that describes a task,..."
2,tt0212346,Undercover FBI agent Gracie Hart shows no sign...,1h 49min,"['Action', 'Comedy', 'Crime']",6.2,2000-12-22,The film opens at a school where a boy is pick...,2 July 2009,ur0819382,True,"I guess ""unpretentious"" is as good a way as an...",6,Modest comedy.,"I guess ""unpretentious"" is as good a way as an...",Classify the following movie review as either ...,"I guess ""unpretentious"" is as good a way as an...",spoiler,2229,"Below is an instruction that describes a task,..."
3,tt1389072,"""Downsizing"" follows a kindly occupational the...",2h 15min,"['Comedy', 'Drama', 'Sci-Fi']",5.8,2017-12-22,The film opens at a research facility in Berge...,22 December 2017,ur69980801,True,"If the environment... or rather, The Environme...",1,Not What It Promises to Be as an Obvious Agend...,"If the environment... or rather, The Environme...",Classify the following movie review as either ...,"If the environment... or rather, The Environme...",spoiler,4299,"Below is an instruction that describes a task,..."
4,tt0364725,White Goodman (Ben Stiller) is the owner and f...,1h 32min,"['Comedy', 'Sport']",6.7,2004-06-18,The movie opens with an infomercial for Globo ...,22 April 2014,ur6918917,True,"Vince Vaughn, Ben Stiller, Christine Taylor, J...",6,"""Good Comedy!""","Vince Vaughn, Ben Stiller, Christine Taylor, J...",Classify the following movie review as either ...,"Vince Vaughn, Ben Stiller, Christine Taylor, J...",spoiler,2868,"Below is an instruction that describes a task,..."


In [None]:
# View text for the first review
print(train_dataset[0]["text"])

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify the following movie review as either 'spoiler' or 'non-spoiler'.

### Input:
Mendes so meticulously crafts scenes to explore story themes that he completely misses the spirit of Bond, kills the character, and delivers a soulless movie.THIS WILL BE FULL OF SPOILERS If there's one scene that encapsulates this entire movie, it's the scene where M is reciting pretentious poetry interspersed with action scenes involving Bond. The inter-cut action scenes are boring but that doesn't matter. In Mendes hands, it's the pretensions that are important. The action scenes are only there to add symbolic weight to the words of the poem -- or to be strung together as pieces of "art" and paraded as a Bond movie. Ironically, there's no scene of classy performance art like we normally get in a Bond movie (we DO get it in th

# Train the Model

We will use Huggingface TRL's `SFTTrainer`. We do 60 steps to speed things up, but we can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`.

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        max_steps = -1,
        #max_steps = 60, # Set num_train_epochs = 1 for full training runs
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/4268 [00:00<?, ? examples/s]

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA A100-SXM4-40GB. Max memory = 39.557 GB.
4.035 GB of memory reserved.


In [None]:
# To disable all wandb logging and avoid API prompts
import os
os.environ["WANDB_MODE"] = "disabled"

# Train the model (fine-tune) and save training statistics into trainer_stats
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,268 | Num Epochs = 1 | Total steps = 533
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/7,000,000,000 (0.60% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.1951
2,2.3526
3,2.4317
4,2.3005
5,2.2829
6,2.1545
7,2.1188
8,1.8996
9,2.146
10,2.0693


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

1133.7164 seconds used for training.
18.9 minutes used for training.
Peak reserved memory = 5.936 GB.
Peak reserved memory for training = 1.901 GB.
Peak reserved memory % of max memory = 15.006 %.
Peak reserved memory for training % of max memory = 4.806 %.


# Inference

Now we will run the model using two example reviews — one containing a spoiler and one without — to test whether it classifies them correctly.

In [None]:
# Enable fast inference with Unsloth
FastLanguageModel.for_inference(model)


# Example reviews to classify
example_review_spoiler = "The main character dies at the end of the movie."
example_review_non_spoiler = "Tom Holland is the perfect actor to play Spiderman"

# Format prompt with instruction + review as input + empty output for generation
formatted_prompt_spoiler = alpaca_prompt.format(
    "Classify the following movie review as either 'spoiler' or 'non-spoiler'.", #instruction
    example_review_spoiler, #input
    ""  # Leave output empty for the model to generate it
)
formatted_prompt_non_spoiler = alpaca_prompt.format(
    "Classify the following movie review as spoiler or non-spoiler.", #instruction
    example_review_non_spoiler, #input
    ""  # Leave output empty for the model to generate it
)


# Tokenize and move to CUDA
inputs_spoiler = tokenizer([formatted_prompt_spoiler], return_tensors="pt").to("cuda")
inputs_non_spoiler = tokenizer([formatted_prompt_non_spoiler], return_tensors="pt").to("cuda")

# Generate the predictions
outputs_spoiler = model.generate(
    **inputs_spoiler,
    max_new_tokens=64,
    use_cache=True
)
outputs_non_spoiler = model.generate(
    **inputs_non_spoiler,
    max_new_tokens=64,
    use_cache=True
)

In [None]:
# Decode the predictions
print(tokenizer.batch_decode(outputs_spoiler)[0])
print("#-----------------------------------------#")
print(tokenizer.batch_decode(outputs_non_spoiler)[0])

<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify the following movie review as either 'spoiler' or 'non-spoiler'.

### Input:
The main character dies at the end of the movie.

### Response:
non-spoiler</s>
#-----------------------------------------#
<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify the following movie review as spoiler or non-spoiler.

### Input:
Tom Holland is the perfect actor to play Spiderman

### Response:
non-spoiler</s>


We observe that both reviews are classified as non-spoiler. Maybe this is due to the training steps being low. Later we will increase them and check its performance.

# Save the model locally

In [None]:
model.save_pretrained("spoiler_classification_LLM")
tokenizer.save_pretrained("spoiler_classification_LLM")

('spoiler_classification_LLM/tokenizer_config.json',
 'spoiler_classification_LLM/special_tokens_map.json',
 'spoiler_classification_LLM/tokenizer.model',
 'spoiler_classification_LLM/added_tokens.json',
 'spoiler_classification_LLM/tokenizer.json')

# Test on unseen data

The following code is used to evaluate our LLM on test data. To evaluate our LLM on the test data, we could load the previously saved model and tokenizer in the main notebook and then apply the evaluation code below.

In [None]:
# Instruction string (same as in training)
instruction_text = "Classify the following movie review as either 'spoiler' or 'non-spoiler'."

# Function to classify a single review
def classify_review(review_text):
    formatted_prompt = alpaca_prompt.format(instruction_text, review_text, "")
    inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Extract only the predicted label from the generated response
    if "spoiler" in decoded.lower():
        if "non-spoiler" in decoded.lower() and decoded.lower().index("non-spoiler") < decoded.lower().index("spoiler"):
            return "non-spoiler"
        return "spoiler"
    return "non-spoiler"


# Apply classification
predictions = []
for review in test_df["whole_review"]:
    pred = classify_review(review)
    predictions.append(pred)

# Save results the dataframe
test_df["predicted_label"] = predictions
test_df["true_label"] = test_df["is_spoiler"].apply(lambda x: "spoiler" if x else "non-spoiler")

# View results
test_df[["whole_review", "true_label", "predicted_label"]].head()

Unnamed: 0,whole_review,true_label,predicted_label
1158,"Batman Forever. Personally, i don't like this ...",spoiler,spoiler
4838,"Ever since ""Rise of the Planet of the apes"" wa...",non-spoiler,spoiler
3088,"The Lives of Others,Das Leben der Anderen in G...",spoiler,spoiler
803,Two sisters (Davis & Petty) help make the firs...,non-spoiler,spoiler
168,"I love North by Northwest, Birds and Psycho.Bu...",spoiler,spoiler


In [None]:
from sklearn.metrics import classification_report
print(classification_report(test_df["true_label"], test_df["predicted_label"], labels=["spoiler", "non-spoiler"]))

              precision    recall  f1-score   support

     spoiler       0.49      1.00      0.66       520
 non-spoiler       0.00      0.00      0.00       547

    accuracy                           0.49      1067
   macro avg       0.24      0.50      0.33      1067
weighted avg       0.24      0.49      0.32      1067



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


We observe that the model achieves a recall of 1.0 when treating "spoiler" as the positive class, and a recall of 0 when treating "non-spoiler" as positive. This indicates that the model classified all instances as spoilers. A likely cause is the low number of training steps, which we plan to address.

# Classify as spoiler/non-spoiler based on both the review and the plot

Since the movie plots do not contain spoilers, we will train the LLM to classify reviews by incorporating both the review text and the plot information. We will then compare its performance to a baseline model that relies solely on the review text, to evaluate whether including the plot improves spoiler detection.

In [None]:
# Create copy of training and test data (again)
train_df = train_df_original.copy()
test_df = test_df_original.copy()

In [None]:
# From FastLanguageModel return the model and the tokenizer
model_wc, tokenizer_wc = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
)

==((====))==  Unsloth 2025.3.19: Fast Mistral patching. Transformers: 4.50.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
# Prepare the model for fine tuning - update only some of the model's parameters
model_wc = FastLanguageModel.get_peft_model(
    model_wc,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
# Define prompt template
alpaca_prompt_with_context = """You are a spoiler detection assistant. Use the provided plot summary and plot synopsis (which contain no spoilers) to help judge whether the review contains a spoiler.

### Instruction:
Classify the following movie review as either 'spoiler' or 'non-spoiler'. You may use the plot summary and plot synopsis as context.

### Input:
Review: {}
Plot Summary: {}
Plot Synopsis: {}

### Response:
{}"""

In [None]:
# Define a function that adds instruction, input, and output columns to our dataset
def add_instruction_with_context(df):
    df["instruction"] = "Classify the following movie review as either 'spoiler' or 'non-spoiler'. You may use the plot summary and plot synopsis as context."
    df["input_summary"] = df["plot_summary"].fillna("")
    df["input_synopsis"] = df["plot_synopsis"].fillna("")
    df["input_review"] = df["whole_review"]
    df["output"] = df["is_spoiler"].apply(lambda x: "spoiler" if x else "non-spoiler")
    return df

In [None]:
# Add instruction, input and output columns to training and test dataframes
train_df = add_instruction_with_context(train_df)

# Convert training dataframe to HuggingFace Dataset
train_dataset = Dataset.from_pandas(train_df)

In [None]:
# Function that converts dataset to instruction-style using the alpaca_prompt
def formatting_prompts_func_with_context(examples):
    summaries = examples["input_summary"]
    synopses  = examples["input_synopsis"]
    reviews   = examples["input_review"]
    outputs   = examples["output"]
    instructions = examples["instruction"]

    texts = []
    for instruction, summary, synopsis, review, output in zip(instructions, summaries, synopses, reviews, outputs):
        text = alpaca_prompt.format(summary, synopsis, review, output) + EOS_TOKEN
        texts.append(text)
    return { "text": texts }

In [None]:
# Convert training dataset to instruction-style
train_dataset = train_dataset.map(formatting_prompts_func_with_context, batched = True,)

Map:   0%|          | 0/4268 [00:00<?, ? examples/s]

# Train the model

In [None]:
trainer_wc = SFTTrainer(
    model = model_wc,
    tokenizer = tokenizer_wc,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        max_steps = -1,
        #max_steps = 60, # Set num_train_epochs = 1 for full training runs
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/4268 [00:00<?, ? examples/s]

In [None]:
# Train the model (fine-tune) and save training statistics into trainer_stats
trainer_stats_wc = trainer_wc.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,268 | Num Epochs = 1 | Total steps = 533
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/7,000,000,000 (0.60% trained)


Step,Training Loss
1,2.0655
2,2.1344
3,2.0598
4,2.0649
5,2.0024
6,2.0056
7,1.9817
8,1.8898
9,1.9764
10,1.877


# Save the model locally

In [None]:
model_wc.save_pretrained("spoiler_classification_with_context_LLM")
tokenizer_wc.save_pretrained("spoiler_classification_with_context_LLM")

('spoiler_classification_with_context_LLM/tokenizer_config.json',
 'spoiler_classification_with_context_LLM/special_tokens_map.json',
 'spoiler_classification_with_context_LLM/tokenizer.model',
 'spoiler_classification_with_context_LLM/added_tokens.json',
 'spoiler_classification_with_context_LLM/tokenizer.json')

# Test on unseen data

In [None]:
# Enable fast inference with Unsloth
FastLanguageModel.for_inference(model_wc)

# Instruction string (same as in training)
instruction_text = "Classify the following movie review as either 'spoiler' or 'non-spoiler'. You may use the plot summary and plot synopsis as context."

# Function to classify a single review
def classify_review(review_text, summary_text, synopsis_text):
    formatted_prompt = alpaca_prompt_with_context.format(review_text, summary_text, synopsis_text, "")
    inputs = tokenizer_wc([formatted_prompt], max_length = max_seq_length, truncation=True, return_tensors="pt").to("cuda")
    outputs = model_wc.generate(**inputs, max_new_tokens=10, use_cache=True)
    decoded = tokenizer_wc.batch_decode(outputs, skip_special_tokens=True)[0]

    # Clean output
    if "spoiler" in decoded.lower():
        if "non-spoiler" in decoded.lower() and decoded.lower().index("non-spoiler") < decoded.lower().index("spoiler"):
            return "non-spoiler"
        return "spoiler"
    return "non-spoiler"

In [None]:
# Apply classification on test data
predictions = []
for _, row in test_df.iterrows():
    pred = classify_review(row["whole_review"], row["plot_summary"], row["plot_synopsis"])
    predictions.append(pred)

# Save results in the dataframe
test_df["predicted_label"] = predictions
test_df["true_label"] = test_df["is_spoiler"].apply(lambda x: "spoiler" if x else "non-spoiler")

# View results
test_df[["whole_review", "true_label", "predicted_label"]].head()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Print classification report
print(classification_report(test_df["true_label"], test_df["predicted_label"], labels=["spoiler", "non-spoiler"]))