# Setup

### Importing data

In [None]:
import pandas as pd 

df = pd.read_csv("./customer_support_tickets.csv")

df.head(3)

## 

# Preprocessing Data

### Selecting appropriate columns

In [None]:
legacy_cols = ["Ticket Subject", "Ticket Description", "Ticket Priority", "Ticket Type"] 
df = df[legacy_cols] 
df.head(3)

### Removing the {product_purchased} with a NULL value. 

In [None]:
df["Ticket Description"] = df["Ticket Description"].str.replace('{product_purchased}', '[NULL]')
df["text"] = df["Ticket Subject"] + " | " + df["Ticket Description"]
df.drop(columns=["Ticket Subject", "Ticket Description"], inplace=True)
df["text"][0]

In [None]:
text_lengths = df["text"].str.len()
average_length = text_lengths.mean()
min_length = text_lengths.min()
max_length = text_lengths.max()

average_length, min_length, max_length

#(np.float64(287.25162356830793), np.int64(149), np.int64(390)) max length should be 400

### Tokenization 

I utilize one-hot encoding here because we want CrossEntropy loss to compare the distributions.

array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       ...,
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.]])

In [None]:
!pip install torch torchvision scikit-learn transformers --quiet

In [None]:
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import OneHotEncoder
import torch.nn.functional as F

In [None]:
df["Ticket Priority"].unique()

In [None]:
df["Ticket Type"].unique()

## Model Building


#### Tokenization / Encoding

In [None]:
# Mapping the labels to integers
priority_map = {label: idx for idx, label in enumerate(df["Ticket Priority"].unique())}
type_map = {label: idx for idx, label in enumerate(df["Ticket Type"].unique())}

priority_map_idx_to_label = {idx: label for label, idx in priority_map.items()}
type_map_idx_to_label = {idx: label for label, idx in type_map.items()}

df["priority_label"] = df["Ticket Priority"].map(priority_map)
df["type_label"] = df["Ticket Type"].map(type_map)

# Tokenizing the text with BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenized = tokenizer(list(df["text"]), padding=True, truncation=True, return_tensors="pt", max_length=400) # input_ids, attention_mask, token_type_ids

print(priority_map_idx_to_label)
print(type_map_idx_to_label)

#### Pytorch Dataset Object

In [None]:
class TicketDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, priority_labels, type_labels):
        self.encodings = encodings # tokenized
        self.priority_labels = torch.tensor(priority_labels, dtype=torch.long)
        self.type_labels = torch.tensor(type_labels, dtype=torch.long)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["priority_labels"] = self.priority_labels[idx]
        item["type_labels"] = self.type_labels[idx]
        return item

    def __len__(self):
        return len(self.priority_labels)


#### Splitting

In [None]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

train_encodings = tokenizer(list(train_df["text"]), padding=True, truncation=True, return_tensors="pt")
val_encodings = tokenizer(list(val_df["text"]), padding=True, truncation=True, return_tensors="pt")

train_dataset = TicketDataset(train_encodings, train_df["priority_label"].tolist(), train_df["type_label"].tolist())
val_dataset = TicketDataset(val_encodings, val_df["priority_label"].tolist(), val_df["type_label"].tolist())

#### Printing a sample row 

In [None]:

sample = train_dataset[0]

print(sample.keys())

print("Priority Label (y1):", sample["priority_labels"].item(), "--> ", priority_map_idx_to_label[sample["priority_labels"].item()])
print("Type Label (y2):", sample["type_labels"].item(), "--> ", type_map_idx_to_label[sample["type_labels"].item()])

#### Multi-headed BERT Model

In [None]:
from transformers import BertModel
import torch.nn as nn

class MultiTaskBERT(nn.Module):
    def __init__(self, hidden_size=768, num_priorities=4, num_types=5):  
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        self.priority_head = nn.Linear(hidden_size, num_priorities)
        self.type_head = nn.Linear(hidden_size, num_types)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = self.dropout(outputs.pooler_output)
        return self.priority_head(pooled), self.type_head(pooled)


"""
{0: 'Critical', 1: 'Low', 2: 'High', 3: 'Medium'}
{0: 'Technical issue', 1: 'Billing inquiry', 2: 'Cancellation request', 3: 'Product inquiry', 4: 'Refund request'}
"""

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

In [None]:
import torch
from torch.optim import AdamW

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiTaskBERT(num_priorities=4, num_types=5).to(device)  

optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

In [None]:

for epoch in range(3):
    model.train()
    total_loss = 0

    for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        priority_labels = batch["priority_labels"].to(device)
        type_labels = batch["type_labels"].to(device)

        priority_logits, type_logits = model(input_ids, attention_mask)

        # priority_logits and type_logits will have shape (batch_size, num_priorities)
        # we will utilize CrossEntropyLoss for both tasks

        loss1 = loss_fn(priority_logits, priority_labels)
        loss2 = loss_fn(type_logits, type_labels)
        loss = loss1 + loss2

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")