In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sentence_transformers import SentenceTransformer
import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
# Load your dataframe
df = pd.read_csv("./df_intents.csv")

In [3]:
# Encode the tags
label_encoder = LabelEncoder()
df['tag_encoded'] = label_encoder.fit_transform(df['tag'])

In [4]:
# Generate embeddings for patterns
transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = transformer_model.encode(df['patterns'].tolist(), show_progress_bar=True)

Batches:   0%|          | 0/21 [00:00<?, ?it/s]

In [7]:
# Split the data
X_train, X_val, y_train, y_val = train_test_split(
    embeddings, df['tag_encoded'], test_size=0.2, random_state=42
)

In [13]:
y_val

630    39
271    78
135     0
483     9
90     43
       ..
531     7
411    68
235    65
382     2
18     44
Name: tag_encoded, Length: 133, dtype: int64

In [14]:
class TagDataset(Dataset):
    def __init__(self, embeddings, labels):
        # Ensure embeddings are a list of tensors
        self.embeddings = [torch.tensor(e, dtype=torch.float) for e in embeddings]
        self.labels = torch.tensor(labels.tolist(), dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]

# Assuming embeddings is a list of arrays
train_dataset = TagDataset(X_train, y_train)
val_dataset = TagDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [24]:
class BiLSTMClassifier(torch.nn.Module):
    # ... constructor ...
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiLSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = torch.nn.Linear(hidden_size * 2, num_classes)  # 2 for bidirection

    def forward(self, x):
        x = x.unsqueeze(1)  # Add a sequence length dimension
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [27]:
# Parameters
embedding_dim = 384  # Example, adjust based on your embeddings
hidden_size = 128
num_layers = 2
num_classes = 80
num_epochs = 10

# 80 classes
model = BiLSTMClassifier(embedding_dim, hidden_size, num_layers, 80)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [26]:
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []


criterion = torch.nn.CrossEntropyLoss()  # Ensure you're using CrossEntropyLoss

for epoch in range(num_epochs):
    # Training phase
    model.train()
    total_train_loss, total_train_correct, total_train_samples = 0, 0, 0
    for embeddings, labels in train_loader:
        labels = labels.long()  # Labels as long and do not unsqueeze
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        avg_train_loss = total_train_loss / len(train_loader)
        _, predicted = torch.max(outputs.data, 1)
        total_train_correct += (predicted == labels).sum().item()
        total_train_samples += labels.size(0)
        train_accuracy = total_train_correct / total_train_samples

    # Validation phase
    model.eval()
    total_val_loss, total_val_correct, total_val_samples = 0, 0, 0
    with torch.no_grad():
        for embeddings, labels in val_loader:
            labels = labels.long()  # Labels as long and do not unsqueeze
            outputs = model(embeddings)
            loss = criterion(outputs, labels)

            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val_correct += (predicted == labels).sum().item()
            total_val_samples += labels.size(0)

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = total_val_correct / total_val_samples
    val_losses.append(avg_val_loss)
    val_accuracies.append(val_accuracy)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, '
          f'Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}')


ValueError: Target size (torch.Size([32, 1])) must be the same as input size (torch.Size([32, 80]))