In [2]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.4.1-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.6-py3-none-any.whl.metadata (5.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->torchmetrics)
  Usin

In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchmetrics import Accuracy, Precision, Recall
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [55]:
df = pd.read_csv('/content/customer_support_tickets.csv')

df['Ticket Subject'] = df['Ticket Subject'].fillna('')

word2idx = {'<PAD>': 0}
max_len = 100

def text_to_sequence(text):
    return [word2idx.get(word, len(word2idx)) for word in text.split()[:max_len]]

df['text_seq'] = df['Ticket Subject'].apply(text_to_sequence)

df['text_seq'] = df['text_seq'].apply(lambda x: x + [0] * (max_len - len(x)))

In [56]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['Ticket Type'])

In [57]:
# Split data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [58]:
# Convert data to tensors
train_data = torch.tensor(train_df['text_seq'].tolist(), dtype=torch.long)
train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)
test_data = torch.tensor(test_df['text_seq'].tolist(), dtype=torch.long)
test_labels = torch.tensor(test_df['label'].values, dtype=torch.long)

In [59]:
# Create TensorDataset and DataLoader
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [60]:
# Initialize parameters
vocab_size = len(word2idx) + 1
target_size = len(label_encoder.classes_)  # Number of unique classes
embedding_dim = 64

In [61]:
# Create an instance of the TicketClassifier class
model = TicketClassifier(vocab_size, embedding_dim, target_size)

In [62]:
# Define the optimizer and loss function
lr = 0.05
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [64]:
# Training the model
epochs = 3
model.train()
for i in range(epochs):
    running_loss = 0.0
    num_processed = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        num_processed += len(inputs)
    print(f"Epoch: {i+1}, Loss: {running_loss/num_processed}")
print("Training finished.")

Epoch: 1, Loss: 0.0040395170120295565
Epoch: 2, Loss: 0.004040936367978029
Epoch: 3, Loss: 0.004038974561374566
Training finished.


In [65]:
# Initialize metrics
accuracy_metric = Accuracy(task='multiclass', num_classes=target_size)
precision_metric = Precision(task='multiclass', num_classes=target_size, average=None)
recall_metric = Recall(task='multiclass', num_classes=target_size, average=None)

In [66]:
# Evaluate the model on the test set
model.eval()
predicted = []

for inputs, labels in test_loader:
    output = model(inputs)
    cat = torch.argmax(output, dim=-1)
    predicted.extend(cat.tolist())
    accuracy_metric(cat, labels)
    precision_metric(cat, labels)
    recall_metric(cat, labels)

In [67]:
# Calculate metrics
accuracy = accuracy_metric.compute().item()
precision = precision_metric.compute().tolist()
recall = recall_metric.compute().tolist()

In [68]:
# Output results
print('Accuracy:', accuracy)
print('Precision (per class):', precision)
print('Recall (per class):', recall)

Accuracy: 0.19303423166275024
Precision (per class): [0.0, 0.19303423166275024, 0.0, 0.0, 0.0]
Recall (per class): [0.0, 1.0, 0.0, 0.0, 0.0]
