In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchdiffeq

class TextDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Define the ODE function (derivative)
class ODEFunc(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.linear = nn.Linear(dim, dim)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, t, x):
        return self.relu(self.linear(x))

# Define the ODE block that integrates the ODEFunc
class ODEBlock(nn.Module):
    def __init__(self, odefunc):
        super().__init__()
        self.odefunc = odefunc

    def forward(self, x):
        # Integrate from t=0 to t=1
        return torchdiffeq.odeint_adjoint(self.odefunc, x, torch.tensor([0, 1], dtype=torch.float32), method='dopri5')[1]

# Define the Neural ODE model for text classification
class ODEModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.ode_block = ODEBlock(ODEFunc(input_dim))
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.ode_block(x)
        return self.fc(x)


In [2]:
# Training loop
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    losses = []
    for data, target in tqdm(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    print(f'Train Epoch: {epoch} \tAverage Loss: {sum(losses)/len(losses):.6f}')

In [11]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset including all categories
data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

# Define the desired categories and their corresponding labels
category_labels = {
    'comp.graphics': 0,
    'comp.os.ms-windows.misc': 0,
    'comp.sys.ibm.pc.hardware': 0,
    'comp.sys.mac.hardware': 0,
    'comp.windows.x': 0,
    'rec.autos': 1,
    'rec.motorcycles': 1,
    'rec.sport.baseball': 1,
    'rec.sport.hockey': 1,
    'sci.crypt': 2,
    'sci.electronics': 2,
    'sci.med': 2,
    'sci.space': 2,
    'talk.politics.guns': 3,
    'talk.politics.mideast': 3,
    'talk.politics.misc': 3,
    'talk.religion.misc': 3
}

# Filter the dataset to include only the desired categories
filtered_texts = []
filtered_labels = []
for text, label in zip(data.data, data.target):
    category = data.target_names[label]
    if category in category_labels:
        filtered_texts.append(text)
        filtered_labels.append(category_labels[category])

# Now, filtered_texts contain the texts and filtered_labels contain the corresponding labels


# Convert to TF-IDF vectors
vectorizer = TfidfVectorizer(max_features=100)  # Limiting to 100 features
X = vectorizer.fit_transform(texts).toarray()

In [4]:
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Convert to torch.Tensor
train_data = torch.tensor(X_train, dtype=torch.float32)
train_labels = torch.tensor(y_train, dtype=torch.long)
test_data = torch.tensor(X_test, dtype=torch.float32)
test_labels = torch.tensor(y_test, dtype=torch.long)

# Create TensorDatasets
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [5]:
# Initialize the model
input_dim = X_train.shape[1]
output_dim = len(np.unique(labels))  # Number of classes
model = ODEModel(input_dim=input_dim, output_dim=output_dim)

In [6]:
from tqdm import tqdm
def train_model(model, device, train_loader, optimizer, criterion, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch: {epoch+1}, Average Loss: {avg_loss:.4f}')

def predict_text(model, device, vectorizer, new_text, class_names):
    model.eval()
    processed_text = vectorizer.transform([new_text]).toarray()  # Vectorize text
    processed_text_tensor = torch.tensor(processed_text, dtype=torch.float32).to(device)
    with torch.no_grad():
        output = model(processed_text_tensor)
        prediction = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
    predicted_class = class_names[prediction.item()]
    return predicted_class

def evaluate_model(model, device, data_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            total_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    avg_loss = total_loss / len(data_loader)
    accuracy = 100. * correct / len(data_loader.dataset)
    return avg_loss, accuracy


In [10]:
# Import necessary libraries (assuming the above code blocks are already executed)
import torch.nn.functional as F

# Define criterion and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Training the model
print("Starting Training")
train_model(model, device, train_loader, optimizer, criterion, epochs=5)

# Evaluating the model on the test set
test_loss, test_accuracy = evaluate_model(model, device, test_loader, criterion)
print(f'Test Set Evaluation: Average loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%')

# Inference Example
new_text = "This is a new document to classify."
class_names = data.target_names  # Assuming 'data' has been loaded with fetch_20newsgroups
predicted_class = predict_text(model, device, vectorizer, new_text, class_names)
print(f"The predicted class for the new document is: {predicted_class}")


Starting Training


236it [22:08,  5.63s/it]


Epoch: 1, Average Loss: 2.6591


236it [24:04,  6.12s/it]


Epoch: 2, Average Loss: 2.5774


236it [26:41,  6.79s/it]


Epoch: 3, Average Loss: 2.5216


236it [28:37,  7.28s/it]


Epoch: 4, Average Loss: 2.4838


236it [29:37,  7.53s/it]


Epoch: 5, Average Loss: 2.4591
Test Set Evaluation: Average loss: 2.5136, Accuracy: 20.80%
The predicted class for the new document is: misc.forsale


In [None]:
def visualize_intermediate_representations(model, data_loader, device):
    model.eval()
    representations = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            # Assuming the model has an `ode_block` attribute
            representation = model.ode_block(data)  
            representations.append(representation.cpu().numpy())
            labels.append(target.cpu().numpy())
    
    # Concatenate all collected representations and labels
    representations = np.concatenate(representations, axis=0)
    labels = np.concatenate(labels, axis=0)
    
    # Apply dimensionality reduction (e.g., t-SNE)
    from sklearn.manifold import TSNE
    tsne = TSNE(n_components=2, random_state=42)
    reduced_data = tsne.fit_transform(representations)
    
    # Plot the reduced data
    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 8))
    for label in np.unique(labels):
        idx = labels == label
        plt.scatter(reduced_data[idx, 0], reduced_data[idx, 1], label=label, alpha=0.5)
    plt.legend()
    plt.title('t-SNE visualization of learned representations')
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming `func` is your ODE function modeling f(y, t)
# and you've defined a grid of points in your 2D space
Y, X = np.mgrid[ymin:ymax:100j, xmin:xmax:100j]
U, V = np.zeros(Y.shape), np.zeros(X.shape)

for i in range(Y.shape[0]):
    for j in range(X.shape[1]):
        # Evaluate the vector field at each point
        dydt = func(None, np.array([Y[i, j], X[i, j]]))  # None for time if it's not used
        U[i, j], V[i, j] = dydt[0], dydt[1]

plt.streamplot(X, Y, U, V, color='0.8')
plt.show()
