In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder

file_path = 'combined_dataset_filtered_100_partitions.csv'
data = pd.read_csv(file_path)

label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])

class TimeSeriesDataset(Dataset):
    def __init__(self, data, sequence_length):
        self.data = data
        self.sequence_length = sequence_length
        self.features = data.columns[:-1]  # Exclude the target column

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        x = self.data[self.features].iloc[idx:idx+self.sequence_length].values
        y = self.data['target'].iloc[idx+self.sequence_length]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

sequence_length = 30
dataset = TimeSeriesDataset(data, sequence_length)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

class TransformerModel(nn.Module):
    def __init__(self, input_size, num_layers, num_heads, hidden_dim, num_classes):
        super(TransformerModel, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=num_heads, dim_feedforward=hidden_dim)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(input_size * sequence_length, num_classes)

    def forward(self, x):
        x = x.permute(1, 0, 2)  # Transpose for transformer input
        x = self.transformer_encoder(x)
        x = x.permute(1, 0, 2).contiguous().view(x.size(1), -1)  # Flatten
        x = self.fc(x)
        return x

input_size = data.shape[1] - 1
num_layers = 2
num_heads = 4
hidden_dim = 128
num_classes = len(label_encoder.classes_)

model = TransformerModel(input_size, num_layers, num_heads, hidden_dim, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
 
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    total = 0

    for x_batch, y_batch in dataloader:
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

    accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}, Accuracy: {accuracy:.4f}')

print("Training complete.")




Epoch [1/10], Loss: 1.1638, Accuracy: 0.6088
Epoch [2/10], Loss: 0.5650, Accuracy: 0.7947
Epoch [3/10], Loss: 0.6094, Accuracy: 0.8140
Epoch [4/10], Loss: 0.4489, Accuracy: 0.8509
Epoch [5/10], Loss: 0.5104, Accuracy: 0.8509
Epoch [6/10], Loss: 0.3464, Accuracy: 0.8684
Epoch [7/10], Loss: 0.3345, Accuracy: 0.8649
Epoch [8/10], Loss: 0.3474, Accuracy: 0.8649
Epoch [9/10], Loss: 0.3277, Accuracy: 0.8789
Epoch [10/10], Loss: 0.3254, Accuracy: 0.8842
Training complete.
