In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import psutil
import os
import deepspeed

# Constants
INPUT_DIM = 768
HIDDEN_DIM = 256
OUTPUT_DIM = 33
NUM_LAYERS = 2
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_EPOCHS = 50
DROPOUT = 0.5

# Check for GPU availability and setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Load data
train_data = pd.read_hdf('train_embeddings.h5')
test_data = pd.read_hdf('test_embeddings.h5')
val_data = pd.read_hdf('val_embeddings.h5')

# Encode labels
label_encoder = LabelEncoder()
train_data['source'] = label_encoder.fit_transform(train_data['source'])
test_data['source'] = label_encoder.transform(test_data['source'])
val_data['source'] = label_encoder.transform(val_data['source'])

# Dataset class
class EmbeddingDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = np.array(embeddings)
        self.labels = np.array(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.embeddings[idx], dtype=torch.float), self.labels[idx]

# Data loaders
train_dataset = EmbeddingDataset(train_data['gpt2_embeddings'], train_data['source'])
test_dataset = EmbeddingDataset(test_data['gpt2_embeddings'], test_data['source'])
val_dataset = EmbeddingDataset(val_data['gpt2_embeddings'], val_data['source'])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Model definition
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(INPUT_DIM, HIDDEN_DIM, NUM_LAYERS, dropout=DROPOUT, batch_first=True)
        self.fc = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])  # Use last timestep

model = LSTMModel()
loss_fn = nn.CrossEntropyLoss()

# Setup DeepSpeed
deepspeed_config = {
    "train_batch_size": 64,
    "gradient_accumulation_steps": 1,
    "train_micro_batch_size_per_gpu": 32,
    "optimizer": {
        "type": "Adam",
        "params": {
            "lr": 0.001
        }
    },
    "scheduler": {
        "type": "WarmupLR",
        "params": {
            "warmup_min_lr": 0,
            "warmup_max_lr": 0.001,
            "warmup_num_steps": 100
        }
    },
    "fp16": {
        "enabled": True
    },
    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": True
        }
    }
}

model_engine, optimizer, _, _ = deepspeed.initialize(
    model=model,
    optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE),
    config_params=deepspeed_config
)

# Helper function to calculate metrics
def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds.argmax(-1))
    precision = precision_score(labels, preds.argmax(-1), average='macro')
    recall = recall_score(labels, preds.argmax(-1), average='macro')
    f1 = f1_score(labels, preds.argmax(-1), average='macro')
    return accuracy, precision, recall, f1

# Training and evaluation function
def train_and_evaluate(model, train_loader, val_loader, epochs):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for data, labels in tqdm(train_loader):
            data, labels = data.to(model.local_rank), labels.to(model.local_rank)
            model_engine.zero_grad()
            outputs = model(data)
            loss = loss_fn(outputs, labels)
            model_engine.backward(loss)
            model_engine.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}")

        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for data, labels in val_loader:
                data, labels = data.to(model.local_rank), labels.to(model.local_rank)
                outputs = model(data)
                val_preds.append(outputs)
                val_labels.append(labels)

        preds = torch.cat(val_preds)
        labels = torch.cat(val_labels)
        accuracy, precision, recall, f1 = compute_metrics(preds.cpu().numpy(), labels.cpu().numpy())
        print(f"Validation - Epoch {epoch+1}: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

# Run training and evaluation
train_and_evaluate(model_engine, train_loader, val_loader, NUM_EPOCHS)
