<a href="https://colab.research.google.com/github/Imama-Kainat/ByteWise-MLDL/blob/main/Service_Desk_Ticket_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install PyTorch (optional, Google Colab has PyTorch pre-installed)
!pip install torch
!pip install numpy




In [2]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

# Generate random data (simulating tokenized texts and labels)
texts_train = np.random.randint(0, 100, (1000, 50))  # 1000 samples, each of length 50
labels_train = np.random.randint(0, 5, 1000)         # 1000 labels for training
texts_test = np.random.randint(0, 100, (200, 50))    # 200 test samples
labels_test = np.random.randint(0, 5, 200)           # 200 labels for testing

# Create PyTorch Dataset objects
train_data = TensorDataset(torch.tensor(texts_train, dtype=torch.long), torch.tensor(labels_train, dtype=torch.long))
test_data = TensorDataset(torch.tensor(texts_test, dtype=torch.long), torch.tensor(labels_test, dtype=torch.long))

# Create DataLoader objects for batching
batch_size = 400
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

# Define the CNN model for text classification
class TicketClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, target_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.conv = nn.Conv1d(embed_dim, embed_dim, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(embed_dim, target_size)

    def forward(self, text):
        embedded = self.embedding(text).permute(0, 2, 1)  # Shape: (batch_size, embed_dim, seq_len)
        conved = torch.relu(self.conv(embedded)).mean(dim=2)  # Reduce over the sequence length
        return self.fc(conved)

# Hyperparameters and model instantiation
vocab_size = 101        # Vocabulary size (tokens range from 0 to 100)
target_size = 5         # Number of classes
embedding_dim = 64      # Embedding dimension

model = TicketClassifier(vocab_size, embedding_dim, target_size)

# Loss and optimizer
lr = 0.05
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Training loop
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss, num_processed = 0, 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        num_processed += len(inputs)
    print(f"Epoch: {epoch+1}, Loss: {running_loss/num_processed}")

# Metrics calculation function
def calculate_metrics(predictions, labels, num_classes):
    accuracy = (predictions == labels).sum().item() / len(labels)
    precision, recall = [], []
    for i in range(num_classes):
        tp = ((predictions == i) & (labels == i)).sum().item()
        fp = ((predictions == i) & (labels != i)).sum().item()
        fn = ((predictions != i) & (labels == i)).sum().item()
        precision.append(tp / (tp + fp) if tp + fp != 0 else 0.0)
        recall.append(tp / (tp + fn) if tp + fn != 0 else 0.0)
    return accuracy, precision, recall

# Model evaluation on test data
model.eval()
all_predictions, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        output = model(inputs)
        cat = torch.argmax(output, dim=-1)
        all_predictions.extend(cat.tolist())
        all_labels.extend(labels.tolist())

# Convert to tensors for metrics calculation
all_predictions = torch.tensor(all_predictions)
all_labels = torch.tensor(all_labels)

# Calculate accuracy, precision, and recall
accuracy, precision, recall = calculate_metrics(all_predictions, all_labels, target_size)
print('Accuracy:', accuracy)
print('Precision (per class):', precision)
print('Recall (per class):', recall)


Epoch: 1, Loss: 0.0055198919773101806
Epoch: 2, Loss: 0.004913164615631104
Epoch: 3, Loss: 0.0047777019739151
Accuracy: 0.185
Precision (per class): [0.19230769230769232, 0.0, 0.0, 0.1774193548387097, 0.14285714285714285]
Recall (per class): [0.625, 0.0, 0.0, 0.2391304347826087, 0.02631578947368421]
