In [9]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.nn import TransformerEncoder, TransformerEncoderLayer

In [10]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class SequenceTransformer(nn.Module):
    def __init__(self, input_dim, nhead, nhid, nlayers, output_dim):
        super(SequenceTransformer, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(input_dim)
        encoder_layers = TransformerEncoderLayer(input_dim, nhead, nhid)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Linear(input_dim, input_dim)
        self.decoder = nn.Linear(input_dim, output_dim)

    def forward(self, src):
        src = self.encoder(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output[:, -1, :])
        return output

In [11]:
# Load data
data = pd.read_csv('df_full.csv')

# Data preprocessing
data.drop(['Unnamed: 0', 'student_id'], axis=1, inplace=True)
data.fillna(0, inplace=True)  # Assuming filling NaNs with zero is appropriate
data = pd.get_dummies(data, columns=['book'], drop_first=True)
data['status'] = pd.Categorical(data['status'])
data['status'] = data['status'].cat.codes

# Split features and target
features = data.drop('status', axis=1).astype(np.float32)
targets = data['status'].astype(np.int64)

# Normalize features
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.int64)  # Ensure it's converted to a NumPy array first
y_test = torch.tensor(y_test.values, dtype=torch.int64)

# Create dataloaders
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)

# Model parameters
input_dim = X_train.shape[1]  # Features count
nhead = 4  # Number of heads in the multiheadattention models
nhid = 128  # The dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 3  # The number of nn.TransformerEncoderLayer in nn.TransformerEncoder
output_dim = len(pd.unique(data['status']))  # Number of unique output classes

model = SequenceTransformer(input_dim, nhead, nhid, nlayers, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

# Testing the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

AssertionError: embed_dim must be divisible by num_heads