In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchgpipe import GPipe
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import time
import psutil

In [None]:
# Constants
INPUT_DIM = 768
HIDDEN_DIM = 256
OUTPUT_DIM = 33
NUM_LAYERS = 2
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_EPOCHS = 50
DROPOUT = 0.5

In [None]:
# Define your model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(INPUT_DIM, HIDDEN_DIM, NUM_LAYERS, dropout=DROPOUT, batch_first=True)
        self.fc = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

    def forward(self, x):
        output, _ = self.lstm(x)
        return self.fc(output[:, -1, :])  # Use last timestep

In [None]:
# Dataset class
class EmbeddingDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = np.array(embeddings)
        self.labels = np.array(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.embeddings[idx], dtype=torch.float32), self.labels[idx]

In [None]:
# Load data and encode labels
def load_data():
    train_data = pd.read_hdf('train_embeddings.h5')
    test_data = pd.read_hdf('test_embeddings.h5')
    val_data = pd.read_hdf('val_embeddings.h5')

    label_encoder = LabelEncoder()
    train_data['source'] = label_encoder.fit_transform(train_data['source'])
    test_data['source'] = label_encoder.transform(test_data['source'])
    val_data['source'] = label_encoder.transform(val_data['source'])

    train_dataset = EmbeddingDataset(np.array(train_data['gpt2_embeddings']), np.array(train_data['source']))
    test_dataset = EmbeddingDataset(np.array(test_data['gpt2_embeddings']), np.array(test_data['source']))
    val_dataset = EmbeddingDataset(np.array(val_data['gpt2_embeddings']), np.array(val_data['source']))

    return DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True), \
           DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False), \
           DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

train_loader, test_loader, val_loader = load_data()

# Initialize model and wrap with GPipe
model = LSTMModel()
devices = ['cuda:0', 'cuda:1']  # Define the devices
model = GPipe(model, balance=[1, 1], chunks=8, devices=devices)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

In [None]:

# Train and evaluate
def train_and_evaluate():
    metric_records = []
    for epoch in range(NUM_EPOCHS):
        start_time = time.time()
        total_loss = 0
        model.train()
        for data, labels in train_loader:
            data = data.to(devices[0])  # Move data to the first device
            labels = labels.to(devices[1])  # Labels to the last device in pipeline
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        elapsed_time = time.time() - start_time
        gpu_memory = torch.cuda.memory_allocated('cuda:0') / (1024 ** 3)  # GPU memory in GB
        system_memory = psutil.virtual_memory().used / (1024 ** 3)  # System memory in GB

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for data, labels in val_loader:
                data = data.to(devices[0])
                labels = labels.to(devices[1])
                outputs = model(data)
                val_preds.extend(outputs.argmax(dim=1).tolist())
                val_labels.extend(labels.tolist())

        acc = accuracy_score(val_labels, val_preds)
        prec = precision_score(val_labels, val_preds, average='macro')
        rec = recall_score(val_labels, val_preds, average='macro')
        f1 = f1_score(val_labels, val_preds, average='macro')

        metric_records.append((epoch, total_loss, acc, prec, rec, f1, gpu_memory, system_memory, elapsed_time))
        print(f'Epoch {epoch+1}: Loss={total_loss:.4f}, Accuracy={acc:.4f}, Precision={prec:.4f}, Recall={rec:.4f}, F1={f1:.4f}')
        print(f'GPU Memory: {gpu_memory} GB, System Memory: {system_memory} GB, Elapsed Time: {elapsed_time} sec')

    return metric_records

metrics = train_and_evaluate()

In [None]:
# Plotting function 
def plot_metrics(metrics):
    epochs, losses, accuracies, precisions, recalls, f1_scores, gpu_usages, memory_usages, times = zip(*metrics)
    plt.figure(figsize=(15, 5))
    plt.subplot(131)
    plt.plot(epochs, losses, label='Loss')
    plt.plot(epochs, accuracies, label='Accuracy')
    plt.plot(epochs, precisions, label='Precision')
    plt.plot(epochs, recalls, label='Recall')
    plt.plot(epochs, f1_scores, label='F1 Score')
    plt.title('Training Metrics')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(132)
    plt.plot(epochs, gpu_usages, label='GPU Usage (GB)')
    plt.title('GPU Usage')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(133)
    plt.plot(epochs, times, label='Training Time (s)')
    plt.title('Training Time per Epoch')
    plt.xlabel('Epoch')
    plt.legend()

    plt.show()

plot_metrics(metrics)