In [1]:
from transformers import pipeline

pipe = pipeline("text-classification", model="vineetsharma/customer-support-intent-albert")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

tokenizer = AutoTokenizer.from_pretrained("vineetsharma/customer-support-intent-albert")
model = AutoModelForSequenceClassification.from_pretrained("vineetsharma/customer-support-intent-albert")

In [3]:
text = "Are there any eco-friendly clothing options available?"

In [4]:
result = pipe(text)
print(result)

[{'label': 'change_order', 'score': 0.7713922262191772}]


In [5]:
text = "What size should I choose if I'm between two sizes?"
result = pipe(text)
print(result)

[{'label': 'change_order', 'score': 0.4721727669239044}]


In [6]:
pip install datasets



In [7]:
from datasets import load_dataset, DatasetDict, Dataset
import pandas as pd

In [8]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

In [9]:
df = pd.read_csv('Training_Data.csv')

In [10]:
print(df.head(10))

                                                text                label
0   Can I return an item if it doesn't fit properly?     Customer Service
1        What's the latest fashion trend for summer?               Review
2  Are there any eco-friendly clothing options av...  Product Description
3  Can you recommend comfortable activewear for w...        Related Items
4  Is there a sale or discount section on the web...     Customer Service
5        What colors are popular for fall this year?               Review
6  Are there any sustainable and ethically produc...  Product Description
7            Can I track my order once it's shipped?     Customer Service
8  Do you have any clothing suitable for a formal...    Occasion specific
9  What size should I choose if I'm between two s...        Personal Info


In [11]:
label_mapping = {label: idx for idx, label in enumerate(df['label'].unique())}
df['label'] = df['label'].map(label_mapping)

In [12]:
dataset = Dataset.from_pandas(df)

In [13]:
tokenizer = AutoTokenizer.from_pretrained("vineetsharma/customer-support-intent-albert")
model = AutoModelForSequenceClassification.from_pretrained("vineetsharma/customer-support-intent-albert")

In [14]:
def preprocess_data(examples):
    return tokenizer(examples['text'], padding=True, truncation=True)

tokenized_dataset = dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

In [15]:
split_dataset = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

In [16]:
!pip install accelerate -U



In [17]:
!pip install transformers[torch]



In [18]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    weight_decay=0.01,
)



In [19]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

In [20]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,2.144397
2,No log,1.946576
3,No log,1.749663
4,No log,1.67973
5,No log,1.6317


TrainOutput(global_step=45, training_loss=1.5007259792751737, metrics={'train_runtime': 263.494, 'train_samples_per_second': 2.733, 'train_steps_per_second': 0.171, 'total_flos': 977002970400.0, 'train_loss': 1.5007259792751737, 'epoch': 5.0})

In [21]:
results = trainer.evaluate()
print(f"Evaluation results: {results}")

Evaluation results: {'eval_loss': 1.631699800491333, 'eval_runtime': 1.8285, 'eval_samples_per_second': 8.75, 'eval_steps_per_second': 0.547, 'epoch': 5.0}


In [22]:
def predict(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    outputs = model(**inputs)
    predictions = outputs.logits.argmax(dim=-1).tolist()
    inverse_label_mapping = {v: k for k, v in label_mapping.items()}
    predicted_labels = [inverse_label_mapping[label] for label in predictions]
    return predicted_labels

In [23]:
example = ["Can I return an item if it doesn't fit properly?", "Is there a sale or discount section on the website?",
           "Do you have any clothing suitable for a formal event?", "Return policy of sale items?"]

In [24]:
predicted_labels = predict(example)

In [25]:
for text, label in zip(example, predicted_labels):
    print(f"Text: {text} => Predicted Label: {label}")

Text: Can I return an item if it doesn't fit properly? => Predicted Label: Customer Service
Text: Is there a sale or discount section on the website? => Predicted Label: Customer Service
Text: Do you have any clothing suitable for a formal event? => Predicted Label: Occasion specific
Text: Return policy of sale items? => Predicted Label: Customer Service


In [26]:
model.save_pretrained('./MODEL')
tokenizer.save_pretrained('./MODEL')

('./MODEL/tokenizer_config.json',
 './MODEL/special_tokens_map.json',
 './MODEL/tokenizer.json')

In [27]:
import json

# Save the label mapping
with open("./MODEL/label_mapping.json", "w") as file:
    json.dump(label_mapping, file)

In [28]:
import json

# Load the label mapping
with open("./MODEL/label_mapping.json", "r") as file:
    label_mapping = json.load(file)

# Ensure the inverse label mapping is consistent
inverse_label_mapping = {v: k for k, v in label_mapping.items()}

In [29]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./MODEL")
model = AutoModelForSequenceClassification.from_pretrained("./MODEL")