In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re


from datasets import load_dataset
from datasets import Dataset, DatasetDict
from transformers import BertConfig, BertModel


from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
from transformers import TrainingArguments, Trainer


from sklearn.model_selection import train_test_split

import pickle


plt.style.use("dark_background")

pd.set_option("display.max_columns", 2500)
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_colwidth", 500)


plt.style.use("dark_background")

%load_ext lab_black


def text_normalization(string):
    string = string.lower()
    string = re.sub(r"[^a-zA-Z0-9\s]", " ", string)
    string = re.sub(r"\n", " ", string)
    string = re.sub(r"\s+", " ", string)
    string = string.strip()
    return string


df = pd.read_parquet("datasets/amazon_reviews_multi-train.parquet")

df = df[["review_title", "review_body", "product_category"]]

# le = LabelEncoder()

# df["product_class"] = le.fit_transform(df["product_category"])

for col in ["review_title", "review_body"]:
    df[col] = df[col].apply(text_normalization)


df["text"] = df["review_title"] + " " + df["review_body"]

df = df[["text", "product_category"]]

SAMPLE_SIZE = 10000

df_results = df[0:SAMPLE_SIZE]

df = df[0:SAMPLE_SIZE]

2023-09-05 09:17:32.070605: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-05 09:17:32.091798: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
encoded_df = pd.get_dummies(
    df["product_category"], columns=["product_category"], prefix="", prefix_sep=""
)
encoded_df = encoded_df.astype(bool)
df = pd.concat([df["text"], encoded_df], axis=1)

# Make into Dataset

In [3]:
encoded_df

Unnamed: 0,apparel,automotive,baby_product,beauty,book,camera,digital_ebook_purchase,digital_video_download,drugstore,electronics,furniture,grocery,home,home_improvement,industrial_supplies,jewelry,kitchen,lawn_and_garden,luggage,musical_instruments,office_product,other,pc,personal_care_appliances,pet_products,shoes,sports,toy,video_games,watch,wireless
0,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9996,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
9997,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9998,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False


In [4]:
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
valid_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Convert the split DataFrames into Datasets
train = Dataset.from_pandas(train_df, split="train")
valid = Dataset.from_pandas(valid_df, split="validation")
test = Dataset.from_pandas(test_df, split="test")

dataset = DatasetDict({"train": train, "validation": valid, "test": test})

In [5]:
labels = [
    label
    for label in dataset["train"].features.keys()
    if label not in ["text", "__index_level_0__"]
]
id2label = {idx: label for idx, label in enumerate(labels)}
label2id = {label: idx for idx, label in enumerate(labels)}

In [6]:
# with open("label_dicts/id2label.pkl", "wb") as f:
#  pickle.dump(id2label, f)

# with open("label_dicts/label2id.pkl", "wb") as f:
#   pickle.dump(label2id, f)

In [6]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [7]:
def preprocess_data(examples):
    # take a batch of texts
    text = examples["text"]
    # encode them
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=256)
    # add labels
    labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
    # create numpy array of shape (batch_size, num_labels)
    labels_matrix = np.zeros((len(text), len(labels)))
    # fill numpy array
    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]

    encoding["labels"] = labels_matrix.tolist()

    return encoding

In [8]:
encoded_dataset = dataset.map(
    preprocess_data, batched=True, remove_columns=dataset["train"].column_names
)

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [9]:
encoded_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 7000
    })
    validation: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1500
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1500
    })
})

In [10]:
example = encoded_dataset["train"][0]

In [11]:
tokenizer.decode(example["input_ids"])

'[CLS] way too quiet i want to love this because skip hop makes amazing products but this white noise machine is way too quiet i can hardly hear anything coming out i got a few white noise machines and this one is by far the quietest the hatch which is not portable is nice and loud i got another travel white noise machine on amazon that looked like a little bunny and it s nice and loud i don t know why this machine is so quiet i regret buying it but that s probably why it was affordable [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD

In [12]:
example["labels"]

[0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [13]:
encoded_dataset.set_format("torch")

In [14]:
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    problem_type="multi_label_classification",
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
batch_size = 8
metric_name = "f1"

In [16]:
args = TrainingArguments(
    f"bert-finetuned-sem_eval-english",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    # push_to_hub=True,
)

In [17]:
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average="micro")
    roc_auc = roc_auc_score(y_true, y_pred, average="micro")
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {"f1": f1_micro_average, "roc_auc": roc_auc, "accuracy": accuracy}
    return metrics


def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    result = multi_label_metrics(predictions=preds, labels=p.label_ids)
    return result

In [18]:
encoded_dataset["train"][0]["labels"].type()

'torch.FloatTensor'

In [19]:
encoded_dataset["train"]["input_ids"][0]

tensor([  101,  2126,  2205,  4251,  1045,  2215,  2000,  2293,  2023,  2138,
        13558,  6154,  3084,  6429,  3688,  2021,  2023,  2317,  5005,  3698,
         2003,  2126,  2205,  4251,  1045,  2064,  6684,  2963,  2505,  2746,
         2041,  1045,  2288,  1037,  2261,  2317,  5005,  6681,  1998,  2023,
         2028,  2003,  2011,  2521,  1996,  4251,  4355,  1996, 11300,  2029,
         2003,  2025, 12109,  2003,  3835,  1998,  5189,  1045,  2288,  2178,
         3604,  2317,  5005,  3698,  2006,  9733,  2008,  2246,  2066,  1037,
         2210, 16291,  1998,  2009,  1055,  3835,  1998,  5189,  1045,  2123,
         1056,  2113,  2339,  2023,  3698,  2003,  2061,  4251,  1045,  9038,
         9343,  2009,  2021,  2008,  1055,  2763,  2339,  2009,  2001, 15184,
          102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

In [20]:
outputs = model(
    input_ids=encoded_dataset["train"]["input_ids"][0].unsqueeze(0),
    labels=encoded_dataset["train"][0]["labels"].unsqueeze(0),
    attention_mask=encoded_dataset["train"][0]["attention_mask"].unsqueeze(0),
)
outputs

SequenceClassifierOutput(loss=tensor(0.6553, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), logits=tensor([[-0.5469,  0.1284,  0.5035, -0.1624,  0.2553,  0.0110, -0.0137, -0.4793,
         -0.5716, -0.0393, -0.9075,  0.4424, -0.1431,  0.7214, -0.1920,  0.1153,
         -0.2003,  0.3406,  0.1525, -0.7002,  0.0719,  0.0881,  0.7448,  0.2657,
         -0.1153, -0.5972, -0.2544,  0.0270, -0.6985, -0.3758, -0.5672]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [21]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [22]:
trainer.train()



  0%|          | 0/4375 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.1837, 'learning_rate': 1.7714285714285717e-05, 'epoch': 0.57}


  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.13290289044380188, 'eval_f1': 0.0, 'eval_roc_auc': 0.5, 'eval_accuracy': 0.0, 'eval_runtime': 8.8674, 'eval_samples_per_second': 169.158, 'eval_steps_per_second': 21.201, 'epoch': 1.0}
{'loss': 0.1333, 'learning_rate': 1.542857142857143e-05, 'epoch': 1.14}
{'loss': 0.1307, 'learning_rate': 1.3142857142857145e-05, 'epoch': 1.71}


  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.12157846987247467, 'eval_f1': 0.0, 'eval_roc_auc': 0.5, 'eval_accuracy': 0.0, 'eval_runtime': 8.9721, 'eval_samples_per_second': 167.186, 'eval_steps_per_second': 20.954, 'epoch': 2.0}
{'loss': 0.1213, 'learning_rate': 1.0857142857142858e-05, 'epoch': 2.29}
{'loss': 0.1126, 'learning_rate': 8.571428571428571e-06, 'epoch': 2.86}


  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.11099991202354431, 'eval_f1': 0.1046865489957395, 'eval_roc_auc': 0.5280333333333334, 'eval_accuracy': 0.05733333333333333, 'eval_runtime': 9.0744, 'eval_samples_per_second': 165.301, 'eval_steps_per_second': 20.718, 'epoch': 3.0}
{'loss': 0.1036, 'learning_rate': 6.285714285714286e-06, 'epoch': 3.43}
{'loss': 0.0994, 'learning_rate': 4.000000000000001e-06, 'epoch': 4.0}


  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.10639268159866333, 'eval_f1': 0.1984126984126984, 'eval_roc_auc': 0.5573444444444444, 'eval_accuracy': 0.11666666666666667, 'eval_runtime': 8.8656, 'eval_samples_per_second': 169.194, 'eval_steps_per_second': 21.206, 'epoch': 4.0}
{'loss': 0.0919, 'learning_rate': 1.7142857142857145e-06, 'epoch': 4.57}


  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.10529825091362, 'eval_f1': 0.226792662590328, 'eval_roc_auc': 0.5669444444444446, 'eval_accuracy': 0.136, 'eval_runtime': 8.8965, 'eval_samples_per_second': 168.606, 'eval_steps_per_second': 21.132, 'epoch': 5.0}
{'train_runtime': 776.1536, 'train_samples_per_second': 45.094, 'train_steps_per_second': 5.637, 'train_loss': 0.1194923828125, 'epoch': 5.0}


TrainOutput(global_step=4375, training_loss=0.1194923828125, metrics={'train_runtime': 776.1536, 'train_samples_per_second': 45.094, 'train_steps_per_second': 5.637, 'train_loss': 0.1194923828125, 'epoch': 5.0})

In [23]:
trainer.evaluate()

  0%|          | 0/188 [00:00<?, ?it/s]

{'eval_loss': 0.10529825091362,
 'eval_f1': 0.226792662590328,
 'eval_roc_auc': 0.5669444444444446,
 'eval_accuracy': 0.136,
 'eval_runtime': 8.8611,
 'eval_samples_per_second': 169.28,
 'eval_steps_per_second': 21.216,
 'epoch': 5.0}

# Eval

In [24]:
CONFIDENCE_THRESHOLD = 0.1


def inference(text):
    encoding = tokenizer(text, return_tensors="pt")
    encoding = {k: v.to(trainer.model.device) for k, v in encoding.items()}

    outputs = trainer.model(**encoding)

    logits = outputs.logits

    # apply sigmoid + threshold
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(logits.squeeze().cpu())
    predictions = np.zeros(probs.shape)
    predictions[np.where(probs >= CONFIDENCE_THRESHOLD)] = 1
    # turn predicted id's into actual label names
    predicted_labels = [
        id2label[idx] for idx, label in enumerate(predictions) if label == 1.0
    ]
    return predicted_labels

In [25]:
df_results["text"][0:1].apply(inference)

0    [furniture, home]
Name: text, dtype: object

In [26]:
ROWS_TO_EVALUATE = 100

df_results["predicted_class"] = df_results["text"][0:ROWS_TO_EVALUATE].apply(inference)
df_results["correct"] = df_results[0:ROWS_TO_EVALUATE].apply(
    lambda row: int(row["product_category"] in row["predicted_class"]), axis=1
)


def correct_discount(row):
    num_guesses = len(row["predicted_class"])

    if num_guesses > 1 and row["correct"] == 1:
        score = (0.85) ** num_guesses

        return score
    else:
        return row["correct"]


df_results["correct_w_discount"] = df_results[0:ROWS_TO_EVALUATE].apply(
    correct_discount, axis=1
)

In [27]:
df_results["correct"].sum() / ROWS_TO_EVALUATE

0.78

In [28]:
df_results["correct_w_discount"].sum() / ROWS_TO_EVALUATE

0.65068617328125

In [31]:
df_results[0:100]

Unnamed: 0,text,product_category,predicted_class,correct,correct_w_discount
0,i ll spend twice the amount of time boxing up the whole useless thing and send it back with a 1 star review arrived broken manufacturer defect two of the legs of the base were not completely formed so there was no way to insert the casters i unpackaged the entire chair and hardware before noticing this so i ll spend twice the amount of time boxing up the whole useless thing and send it back with a 1 star review of part of a chair i never got to sit in i will go so far as to include a picture...,furniture,"[baby_product, industrial_supplies, other]",0,0.0
1,not use able the cabinet dot were all detached from backing got me,home_improvement,"[baby_product, industrial_supplies, other]",0,0.0
2,the product is junk i received my first order of this product and it was broke so i ordered it again the second one was broke in more places than the first i can t blame the shipping process as it s shrink wrapped and boxed,home,"[baby_product, industrial_supplies, other]",0,0.0
3,fucking waste of money this product is a piece of shit do not buy doesn t work and then i try to call for customer support it won t take my number fucking rip off,wireless,"[baby_product, camera, industrial_supplies]",0,0.0
4,bubble went through 3 in one day doesn t fit correct and couldn t get bubbles out better without,pc,"[baby_product, industrial_supplies, other]",0,0.0
...,...,...,...,...,...
95,terrible don t waste your money terrible don t waste your money it s a jap knockoff of realtor and it totally sucks could connect to my router but after that no dice no web access brilliant scam well done,pc,"[baby_product, industrial_supplies, other]",0,0.0
96,cute but definitely for a small child my 6 cute but definitely for a small child my 6 year old grand daughter didn t want it because it wasn t a real violin,toy,"[baby_product, industrial_supplies, other, wireless]",0,0.0
97,sucks the product worked for maybe five minutes then it stopped and my iphone said is not compatible with iphone,wireless,"[baby_product, industrial_supplies, other]",0,0.0
98,buyer beware phone does not work and seller asked for pictures instead of replacing defective unit phone does not work when plugged in to an active land line no dial tone seller asked for pictures,office_product,"[automotive, baby_product, industrial_supplies, other]",0,0.0


# Save and load

In [29]:
### Save model (already saved)
#trainer.save_model("bert-finetuned-amazon-reviews-multi-train-100k")

In [9]:
model1 = AutoModelForSequenceClassification.from_pretrained(
    "bert-finetuned-amazon-reviews-multi-train-100k"
)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [10]:
trainer1 = Trainer(model=model1, tokenizer=tokenizer)

In [12]:
trainer1.evaluate(encoded_dataset["validation"])

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  0%|          | 0/1875 [00:00<?, ?it/s]

{'eval_loss': 0.0825805515050888,
 'eval_runtime': 89.4772,
 'eval_samples_per_second': 167.64,
 'eval_steps_per_second': 20.955}

In [35]:
text = "I love my new phone. It's really great! The screen is big and bright. The battery lasts a long time"


encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(trainer1.model.device) for k, v in encoding.items()}

outputs = trainer1.model(**encoding)

logits = outputs.logits

# apply sigmoid + threshold
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= CONFIDENCE_THRESHOLD)] = 1
# turn predicted id's into actual label names
predicted_labels = [
    id2label[idx] for idx, label in enumerate(predictions) if label == 1.0
]

# Try on test data

In [36]:
df = pd.read_parquet("datasets/amazon_reviews_multi-test.parquet")

df = df[["review_title", "review_body", "product_category"]]

# le = LabelEncoder()

# df["product_class"] = le.fit_transform(df["product_category"])

for col in ["review_title", "review_body"]:
    df[col] = df[col].apply(text_normalization)


df["text"] = df["review_title"] + " " + df["review_body"]

df_test = df[["text", "product_category"]]

In [37]:
model1 = AutoModelForSequenceClassification.from_pretrained(
    "bert-finetuned-amazon-reviews-multi-train-100k"
)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

trainer1 = Trainer(model=model1, tokenizer=tokenizer)

In [38]:
CONFIDENCE_THRESHOLD = 0.1


def inference(text):
    encoding = tokenizer(text, return_tensors="pt")
    encoding = {k: v.to(trainer1.model.device) for k, v in encoding.items()}

    outputs = trainer1.model(**encoding)

    logits = outputs.logits

    # apply sigmoid + threshold
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(logits.squeeze().cpu())
    predictions = np.zeros(probs.shape)
    predictions[np.where(probs >= CONFIDENCE_THRESHOLD)] = 1
    # turn predicted id's into actual label names
    predicted_labels = [
        id2label[idx] for idx, label in enumerate(predictions) if label == 1.0
    ]
    return predicted_labels

In [39]:
ROWS_TO_EVALUATE = len(df_test)

df_test["predicted_class"] = df_test["text"][0:ROWS_TO_EVALUATE].apply(inference)
df_test["correct"] = df_test[0:ROWS_TO_EVALUATE].apply(
    lambda row: int(row["product_category"] in row["predicted_class"]), axis=1
)


def correct_discount(row, muli_cat_guess_penalty=0.85):
    num_guesses = len(row["predicted_class"])

    if num_guesses > 1 and row["correct"] == 1:
        score = (muli_cat_guess_penalty) ** num_guesses

        return score
    else:
        return row["correct"]


df_test["correct_w_discount"] = df_test[0:ROWS_TO_EVALUATE].apply(
    correct_discount, axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["predicted_class"] = df_test["text"][0:ROWS_TO_EVALUATE].apply(inference)


In [40]:
print(
    f"Percent of predictions that include correct class: {round((df_test.correct.sum() / ROWS_TO_EVALUATE)*100, 2)}%"
)

Percent of predictions that include correct class: 95.4%


In [42]:
df_test

Unnamed: 0,text,product_category,predicted_class,correct,correct_w_discount
0,don t waste your time these are awful they are see through the fabric feels like tablecloth and they fit like children s clothing customer service did seem to be nice though but i regret missing my return date for these i wouldn t even donate them because the quality is so poor,apparel,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
1,one star i bought 4 and none of them worked yes i used new batteries,other,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
2,totally useless on first use it didn t heat up and now it doesn t work at all,other,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
3,gold filled earrings you want an honest answer i just returned from ups where i returned the farce of an earring set to amazon it did not look like what i saw on amazon only a baby would be able to wear the size of the earring they were so small the size of a pin head i at first thought amazon had forgotten to enclose them in the bag i didn t bother to take them out of the bag and you can have them back will never order another thing from your company a disgrace honest enough for you grandma,jewelry,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
4,poor container the glue works fine but the container is impossible to work with the cap doesn t come off without plyers and then won t go back on without a violent abrupt force involving both hands and a solid object desk drawer this happened even though i was careful to not gum up the lid or tapering snout,industrial_supplies,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
...,...,...,...,...,...
4995,very sleek it s still early but so far the scales seems like a good product it is very modern looking my wife got back from the dr s office and said their scale read 3 pounds lighter than ours so time to shop ours was old so maybe lost some degree of accuracy this inevifit seems to be accurate time will tell we like it hard to give products a 5 when they are still in the testing phase and there is always room for improvement i don t really like to have to use 4 aaa batteries but i ll get ove...,drugstore,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
4996,face mask a few times now and it works great it also had a pleasant smell will buy it i ve used this face mask a few times now and it works great it also had a pleasant smell will buy it again,beauty,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
4997,five stars great price good quality,home,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
4998,2016 f150 xlt fits my 2016 f150 xlt great now i dont have to listen to it hitting side to side while driving anymore,automotive,"[apparel, automotive, baby_product, beauty, book, camera, digital_ebook_purchase, drugstore, electronics, furniture, grocery, home, home_improvement, industrial_supplies, jewelry, kitchen, lawn_and_garden, office_product, other, pc, pet_products, sports, toy, video_games, watch, wireless]",1,0.014618
