# BASE Model 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np


In [2]:
# Load the data from CSV
data = pd.read_csv('keystream_records.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)


In [3]:
# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
class KeystreamClassifier(nn.Module):
    def __init__(self):
        super(KeystreamClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 256)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    
    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("Accuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')


## Base Model with train accuracy also to chk overfit

In [7]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    
    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        # Evaluate on training data
        train_outputs = model(torch.tensor(X_train, dtype=torch.float32))
        _, train_predicted = torch.max(train_outputs, 1)
        train_accuracy = accuracy_score(y_train[:, byte_index], train_predicted.numpy())
        
        # Evaluate on test data
        test_outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, test_predicted = torch.max(test_outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], test_predicted.numpy())
        
        return train_accuracy, test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    train_accuracy, test_accuracy = train_and_evaluate(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append((train_accuracy, test_accuracy))
    print(f'Byte {byte_index}: Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy * 100:.2f}%')


# Print all accuracies
print("Accuracies for all 10 bytes:")
for byte_index, (train_accuracy, test_accuracy) in enumerate(accuracies):
    print(f'Byte {byte_index}: Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy * 100:.2f}%')


Epoch 1/50, Loss: 5.546514935302734
Epoch 2/50, Loss: 5.543440324020386
Epoch 3/50, Loss: 5.535131820678711
Epoch 4/50, Loss: 5.511759674835205
Epoch 5/50, Loss: 5.4691279647827145
Epoch 6/50, Loss: 5.41241294631958
Epoch 7/50, Loss: 5.347812014007569
Epoch 8/50, Loss: 5.28043024520874
Epoch 9/50, Loss: 5.213923597335816
Epoch 10/50, Loss: 5.151710289001465
Epoch 11/50, Loss: 5.094512948989868
Epoch 12/50, Loss: 5.042583721542359
Epoch 13/50, Loss: 4.996898053741455
Epoch 14/50, Loss: 4.9560673038482665
Epoch 15/50, Loss: 4.9207466732025145
Epoch 16/50, Loss: 4.887672731399536
Epoch 17/50, Loss: 4.85904135093689
Epoch 18/50, Loss: 4.832201749420166
Epoch 19/50, Loss: 4.807951002120972
Epoch 20/50, Loss: 4.784691147994995
Epoch 21/50, Loss: 4.765532139968872
Epoch 22/50, Loss: 4.745943176651001
Epoch 23/50, Loss: 4.728050874710083
Epoch 24/50, Loss: 4.71211417427063
Epoch 25/50, Loss: 4.697914255523681
Epoch 26/50, Loss: 4.683232507705688
Epoch 27/50, Loss: 4.668825652313233
Epoch 28/50

KeyboardInterrupt: 

# BASE MODEL with val loss,val accuracy and early stopping

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import csv


In [2]:
# Load the data from CSV
data = pd.read_csv('keystream_records_val.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)


In [3]:
# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2


In [4]:
class KeystreamClassifier(nn.Module):
    def __init__(self):
        super(KeystreamClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 256)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


## LR=0.001

In [5]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
    
    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5466, Accuracy: 0.0038, Val Loss: 5.5461, Val Accuracy: 0.0034
Epoch 2/50, Loss: 5.5448, Accuracy: 0.0041, Val Loss: 5.5462, Val Accuracy: 0.0033
Epoch 3/50, Loss: 5.5413, Accuracy: 0.0047, Val Loss: 5.5495, Val Accuracy: 0.0040
Epoch 4/50, Loss: 5.5289, Accuracy: 0.0059, Val Loss: 5.5568, Val Accuracy: 0.0043
Epoch 5/50, Loss: 5.5048, Accuracy: 0.0083, Val Loss: 5.5740, Val Accuracy: 0.0048
Epoch 6/50, Loss: 5.4694, Accuracy: 0.0100, Val Loss: 5.5934, Val Accuracy: 0.0048
Early stopping
Accuracy for byte 0: 0.42%

Training for byte 1...
Epoch 1/50, Loss: 5.5466, Accuracy: 0.0040, Val Loss: 5.5465, Val Accuracy: 0.0036
Epoch 2/50, Loss: 5.5447, Accuracy: 0.0041, Val Loss: 5.5469, Val Accuracy: 0.0037
Epoch 3/50, Loss: 5.5410, Accuracy: 0.0050, Val Loss: 5.5506, Val Accuracy: 0.0030
Epoch 4/50, Loss: 5.5281, Accuracy: 0.0062, Val Loss: 5.5618, Val Accuracy: 0.0035
Epoch 5/50, Loss: 5.5023, Accuracy: 0.0080, Val Loss: 5.5733, Val Accuracy: 0.00

## LR=0.01

In [6]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
    
    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 15
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/15, Loss: 5.5503, Accuracy: 0.0040, Val Loss: 5.5500, Val Accuracy: 0.0032
Epoch 2/15, Loss: 5.5496, Accuracy: 0.0040, Val Loss: 5.5514, Val Accuracy: 0.0037
Epoch 3/15, Loss: 5.5500, Accuracy: 0.0038, Val Loss: 5.5513, Val Accuracy: 0.0042
Epoch 4/15, Loss: 5.5501, Accuracy: 0.0041, Val Loss: 5.5520, Val Accuracy: 0.0041
Epoch 5/15, Loss: 5.5501, Accuracy: 0.0040, Val Loss: 5.5482, Val Accuracy: 0.0026
Epoch 6/15, Loss: 5.5500, Accuracy: 0.0039, Val Loss: 5.5484, Val Accuracy: 0.0036
Epoch 7/15, Loss: 5.5501, Accuracy: 0.0038, Val Loss: 5.5522, Val Accuracy: 0.0026
Epoch 8/15, Loss: 5.5500, Accuracy: 0.0040, Val Loss: 5.5498, Val Accuracy: 0.0033
Epoch 9/15, Loss: 5.5499, Accuracy: 0.0040, Val Loss: 5.5514, Val Accuracy: 0.0033
Epoch 10/15, Loss: 5.5501, Accuracy: 0.0041, Val Loss: 5.5508, Val Accuracy: 0.0034
Early stopping
Accuracy for byte 0: 0.46%

Training for byte 1...
Epoch 1/15, Loss: 5.5503, Accuracy: 0.0039, Val Loss: 5.5508, Val Accuracy: 0.0

# Adding sigmoid layer

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import csv


In [8]:
# Load the data from CSV
data = pd.read_csv('keystream_records_val.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)


In [9]:
# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2


In [10]:
class KeystreamClassifier(nn.Module):
    def __init__(self):
        super(KeystreamClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 8)  # Output layer with 8 neurons
        self.sigmoid = nn.Sigmoid()   # Sigmoid activation

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))  # Apply sigmoid to the output layer
        return x


In [11]:
# Function to convert a byte to 8-bit binary format
def byte_to_bits(byte_array):
    return np.unpackbits(byte_array[:, np.newaxis], axis=1)

# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.BCELoss()  # Binary Cross Entropy Loss
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert data to PyTorch tensors
    y_train_bits = torch.tensor(byte_to_bits(y_train[:, byte_index]), dtype=torch.float32)
    y_val_bits = torch.tensor(byte_to_bits(y_val[:, byte_index]), dtype=torch.float32)
    y_test_bits = torch.tensor(byte_to_bits(y_test[:, byte_index]), dtype=torch.float32)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_bits)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_bits)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
    
    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            predicted = (outputs > 0.5).float()
            correct_train += (predicted == labels).sum().item()
            total_train += labels.numel()

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                predicted = (outputs > 0.5).float()
                correct_val += (predicted == labels).sum().item()
                total_val += labels.numel()

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        predicted = (outputs > 0.5).float()
        correct_test = (predicted == y_test_bits).sum().item()
        total_test = y_test_bits.numel()
        test_accuracy = correct_test / total_test
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 0.6933, Accuracy: 0.4991, Val Loss: 0.6932, Val Accuracy: 0.4988
Epoch 2/50, Loss: 0.6931, Accuracy: 0.5036, Val Loss: 0.6932, Val Accuracy: 0.5019
Epoch 3/50, Loss: 0.6931, Accuracy: 0.5058, Val Loss: 0.6934, Val Accuracy: 0.5001
Epoch 4/50, Loss: 0.6929, Accuracy: 0.5086, Val Loss: 0.6936, Val Accuracy: 0.4976
Epoch 5/50, Loss: 0.6924, Accuracy: 0.5140, Val Loss: 0.6939, Val Accuracy: 0.4983
Epoch 6/50, Loss: 0.6917, Accuracy: 0.5197, Val Loss: 0.6947, Val Accuracy: 0.5004
Epoch 7/50, Loss: 0.6905, Accuracy: 0.5250, Val Loss: 0.6953, Val Accuracy: 0.4999
Early stopping
Accuracy for byte 0: 49.75%

Training for byte 1...
Epoch 1/50, Loss: 0.6933, Accuracy: 0.5008, Val Loss: 0.6932, Val Accuracy: 0.5013
Epoch 2/50, Loss: 0.6931, Accuracy: 0.5034, Val Loss: 0.6934, Val Accuracy: 0.4986
Epoch 3/50, Loss: 0.6930, Accuracy: 0.5068, Val Loss: 0.6937, Val Accuracy: 0.5017
Epoch 4/50, Loss: 0.6928, Accuracy: 0.5107, Val Loss: 0.6936, Val Accuracy: 0.4

# Increase the no of layers

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import csv


In [13]:
# Load the data from CSV
data = pd.read_csv('keystream_records_val.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)


In [14]:
# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2


In [15]:
class KeystreamClassifier(nn.Module):
    def __init__(self):
        super(KeystreamClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 256)
        self.fc5 = nn.Linear(256, 128)
        self.fc6 = nn.Linear(128, 256)  # Last layer with 256 neurons for classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = self.fc6(x)  # No activation in the last layer
        return x


In [16]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = KeystreamClassifier()
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classification
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
    
    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5462, Accuracy: 0.0038, Val Loss: 5.5459, Val Accuracy: 0.0044
Epoch 2/50, Loss: 5.5452, Accuracy: 0.0040, Val Loss: 5.5461, Val Accuracy: 0.0034
Epoch 3/50, Loss: 5.5449, Accuracy: 0.0040, Val Loss: 5.5463, Val Accuracy: 0.0038
Epoch 4/50, Loss: 5.5449, Accuracy: 0.0041, Val Loss: 5.5464, Val Accuracy: 0.0038
Epoch 5/50, Loss: 5.5449, Accuracy: 0.0040, Val Loss: 5.5466, Val Accuracy: 0.0036
Epoch 6/50, Loss: 5.5449, Accuracy: 0.0043, Val Loss: 5.5466, Val Accuracy: 0.0034
Early stopping
Accuracy for byte 0: 0.46%

Training for byte 1...
Epoch 1/50, Loss: 5.5463, Accuracy: 0.0041, Val Loss: 5.5464, Val Accuracy: 0.0046
Epoch 2/50, Loss: 5.5452, Accuracy: 0.0042, Val Loss: 5.5467, Val Accuracy: 0.0036
Epoch 3/50, Loss: 5.5449, Accuracy: 0.0040, Val Loss: 5.5468, Val Accuracy: 0.0032
Epoch 4/50, Loss: 5.5448, Accuracy: 0.0042, Val Loss: 5.5469, Val Accuracy: 0.0032
Epoch 5/50, Loss: 5.5447, Accuracy: 0.0042, Val Loss: 5.5470, Val Accuracy: 0.00

# Schduler 

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import csv


In [4]:
# Load the data from CSV
data = pd.read_csv('keystream_records_10lakh.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)


In [5]:
# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2


In [6]:
class ImprovedKeystreamClassifier(nn.Module):
    def __init__(self):
        super(ImprovedKeystreamClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, 256)
        self.bn4 = nn.BatchNorm1d(256)
        self.fc5 = nn.Linear(256, 128)
        self.bn5 = nn.BatchNorm1d(128)
        self.fc6 = nn.Linear(128, 256)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn4(self.fc4(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn5(self.fc5(x)))
        x = self.fc6(x)  # No activation in the last layer
        return x


# L1

In [21]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = ImprovedKeystreamClassifier()
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classification
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)

    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Adjust learning rate based on validation loss
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5617, Accuracy: 0.0038, Val Loss: 5.5499, Val Accuracy: 0.0030
Epoch 2/50, Loss: 5.5501, Accuracy: 0.0040, Val Loss: 5.5497, Val Accuracy: 0.0034
Epoch 3/50, Loss: 5.5505, Accuracy: 0.0037, Val Loss: 5.5493, Val Accuracy: 0.0033
Epoch 4/50, Loss: 5.5498, Accuracy: 0.0040, Val Loss: 5.5498, Val Accuracy: 0.0032
Epoch 5/50, Loss: 5.5500, Accuracy: 0.0043, Val Loss: 5.5491, Val Accuracy: 0.0039
Epoch 6/50, Loss: 5.5502, Accuracy: 0.0037, Val Loss: 5.5499, Val Accuracy: 0.0042
Epoch 7/50, Loss: 5.5478, Accuracy: 0.0040, Val Loss: 5.5478, Val Accuracy: 0.0039
Epoch 8/50, Loss: 5.5474, Accuracy: 0.0035, Val Loss: 5.5478, Val Accuracy: 0.0040
Epoch 9/50, Loss: 5.5474, Accuracy: 0.0040, Val Loss: 5.5480, Val Accuracy: 0.0038
Epoch 10/50, Loss: 5.5473, Accuracy: 0.0038, Val Loss: 5.5479, Val Accuracy: 0.0037
Epoch 11/50, Loss: 5.5462, Accuracy: 0.0040, Val Loss: 5.5471, Val Accuracy: 0.0034
Epoch 12/50, Loss: 5.5459, Accuracy: 0.0044, Val Loss: 5.5468

In [22]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = ImprovedKeystreamClassifier()
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classification
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)

    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Adjust learning rate based on validation loss
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5863, Accuracy: 0.0039, Val Loss: 5.5551, Val Accuracy: 0.0034
Epoch 2/50, Loss: 5.5530, Accuracy: 0.0040, Val Loss: 5.5472, Val Accuracy: 0.0033
Epoch 3/50, Loss: 5.5457, Accuracy: 0.0037, Val Loss: 5.5466, Val Accuracy: 0.0042
Epoch 4/50, Loss: 5.5452, Accuracy: 0.0046, Val Loss: 5.5468, Val Accuracy: 0.0028
Epoch 5/50, Loss: 5.5450, Accuracy: 0.0044, Val Loss: 5.5470, Val Accuracy: 0.0036
Epoch 6/50, Loss: 5.5447, Accuracy: 0.0042, Val Loss: 5.5468, Val Accuracy: 0.0036
Epoch 7/50, Loss: 5.5434, Accuracy: 0.0048, Val Loss: 5.5474, Val Accuracy: 0.0029
Epoch 8/50, Loss: 5.5424, Accuracy: 0.0050, Val Loss: 5.5476, Val Accuracy: 0.0038
Early stopping
Accuracy for byte 0: 0.40%

Training for byte 1...
Epoch 1/50, Loss: 5.5863, Accuracy: 0.0043, Val Loss: 5.5555, Val Accuracy: 0.0038
Epoch 2/50, Loss: 5.5508, Accuracy: 0.0043, Val Loss: 5.5464, Val Accuracy: 0.0032
Epoch 3/50, Loss: 5.5456, Accuracy: 0.0041, Val Loss: 5.5464, Val Accuracy: 0.00

# L2 regularization

In [23]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = ImprovedKeystreamClassifier()
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classification
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    # Convert data to PyTorch tensors
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)

    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Adjust learning rate based on validation loss
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5866, Accuracy: 0.0041, Val Loss: 5.5546, Val Accuracy: 0.0047
Epoch 2/50, Loss: 5.5509, Accuracy: 0.0038, Val Loss: 5.5461, Val Accuracy: 0.0039
Epoch 3/50, Loss: 5.5460, Accuracy: 0.0042, Val Loss: 5.5459, Val Accuracy: 0.0038
Epoch 4/50, Loss: 5.5451, Accuracy: 0.0041, Val Loss: 5.5462, Val Accuracy: 0.0036
Epoch 5/50, Loss: 5.5452, Accuracy: 0.0042, Val Loss: 5.5479, Val Accuracy: 0.0033
Epoch 6/50, Loss: 5.5441, Accuracy: 0.0044, Val Loss: 5.5467, Val Accuracy: 0.0034
Epoch 7/50, Loss: 5.5434, Accuracy: 0.0043, Val Loss: 5.5474, Val Accuracy: 0.0035
Epoch 8/50, Loss: 5.5432, Accuracy: 0.0049, Val Loss: 5.5476, Val Accuracy: 0.0035
Early stopping
Accuracy for byte 0: 0.38%

Training for byte 1...
Epoch 1/50, Loss: 5.5861, Accuracy: 0.0038, Val Loss: 5.5535, Val Accuracy: 0.0043
Epoch 2/50, Loss: 5.5541, Accuracy: 0.0041, Val Loss: 5.5463, Val Accuracy: 0.0041
Epoch 3/50, Loss: 5.5459, Accuracy: 0.0043, Val Loss: 5.5470, Val Accuracy: 0.00

# 15lakh samples L2 re (shared to sir) One may use

In [7]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [8]:
# Function to train and evaluate a model for a specific byte of the keystream
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index):
    # Initialize the model, loss function, and optimizer
    model = ImprovedKeystreamClassifier().to(device)
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classification
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    # Convert data to PyTorch tensors and move to GPU if available
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long).to(device)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val_tensor = torch.tensor(y_val[:, byte_index], dtype=torch.long).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long).to(device)

    # Create data loaders
    train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Early stopping parameters
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    # Training the model
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Adjust learning rate based on validation loss
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Load the best model
    model.load_state_dict(best_model)

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test_tensor.cpu(), predicted.cpu())
        return test_accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index}: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5509, Accuracy: 0.0039, Val Loss: 5.5458, Val Accuracy: 0.0039
Epoch 2/50, Loss: 5.5458, Accuracy: 0.0038, Val Loss: 5.5457, Val Accuracy: 0.0038
Epoch 3/50, Loss: 5.5457, Accuracy: 0.0038, Val Loss: 5.5457, Val Accuracy: 0.0041
Epoch 4/50, Loss: 5.5457, Accuracy: 0.0038, Val Loss: 5.5456, Val Accuracy: 0.0037
Epoch 5/50, Loss: 5.5454, Accuracy: 0.0039, Val Loss: 5.5455, Val Accuracy: 0.0038
Epoch 6/50, Loss: 5.5454, Accuracy: 0.0040, Val Loss: 5.5456, Val Accuracy: 0.0038
Epoch 7/50, Loss: 5.5454, Accuracy: 0.0039, Val Loss: 5.5455, Val Accuracy: 0.0039
Epoch 8/50, Loss: 5.5453, Accuracy: 0.0039, Val Loss: 5.5454, Val Accuracy: 0.0037
Epoch 9/50, Loss: 5.5453, Accuracy: 0.0039, Val Loss: 5.5454, Val Accuracy: 0.0041
Epoch 10/50, Loss: 5.5453, Accuracy: 0.0039, Val Loss: 5.5454, Val Accuracy: 0.0041
Epoch 11/50, Loss: 5.5452, Accuracy: 0.0039, Val Loss: 5.5454, Val Accuracy: 0.0037
Epoch 12/50, Loss: 5.5452, Accuracy: 0.0039, Val Loss: 5.5454

# Other models

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the data from CSV
data = pd.read_csv('keystream_records_val.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)

# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2

In [None]:
# Function to train and evaluate a Random Forest for a specific byte of the keystream
def train_and_evaluate_rf(X_train, y_train, X_test, y_test, byte_index):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train[:, byte_index])
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test[:, byte_index], y_pred)
    return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate_rf(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with Random Forest: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes with Random Forest:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Function to train and evaluate a Gradient Boosting Machine for a specific byte of the keystream
def train_and_evaluate_gbm(X_train, y_train, X_test, y_test, byte_index):
    model = GradientBoostingClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train[:, byte_index])
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test[:, byte_index], y_pred)
    return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate_gbm(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with GBM: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes with GBM:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')


In [None]:
from sklearn.svm import SVC

# Function to train and evaluate an SVM for a specific byte of the keystream
def train_and_evaluate_svm(X_train, y_train, X_test, y_test, byte_index):
    model = SVC(kernel='linear', random_state=42)
    model.fit(X_train, y_train[:, byte_index])
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test[:, byte_index], y_pred)
    return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate_svm(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with SVM: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes with SVM:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')


Accuracy for byte 0 with SVM: 0.42%


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Function to train and evaluate a k-NN for a specific byte of the keystream
def train_and_evaluate_knn(X_train, y_train, X_test, y_test, byte_index):
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, y_train[:, byte_index])
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test[:, byte_index], y_pred)
    return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate_knn(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with k-NN: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes with k-NN:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')


In [None]:
from sklearn.linear_model import LogisticRegression

# Function to train and evaluate a Logistic Regression model for a specific byte of the keystream
def train_and_evaluate_logreg(X_train, y_train, X_test, y_test, byte_index):
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train[:, byte_index])
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test[:, byte_index], y_pred)
    return accuracy

# Train and evaluate the model for each byte and print the accuracy
accuracies = []
for byte_index in range(10):
    accuracy = train_and_evaluate_logreg(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with Logistic Regression: {accuracy * 100:.2f}%')

# Print all accuracies
print("\nAccuracies for all 10 bytes with Logistic Regression:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')


## Trans

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np


In [2]:
# Load the data from CSV
data = pd.read_csv('keystream_records_val.csv')

# Separate the input features (64-bit key) and the targets (10 bytes of keystream)
X = data.iloc[:, :64].values  # First 64 columns are the 64-bit key
y = data.iloc[:, 64:74].values   # Next 10 columns are the 10 bytes of the keystream

# Convert the target to categorical (byte values range from 0 to 255)
y = y.astype(np.uint8)

# Split the data into training, validation, and testing sets (70-10-20 split)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.67, random_state=42)  # 0.67 * 0.3 ≈ 0.2


In [3]:
class SimpleTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim):
        super(SimpleTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, output_dim)
    
    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)  # Global average pooling
        x = self.fc(x)
        return x

input_dim = 64
model_dim = 128
num_heads = 4
num_layers = 2
output_dim = 256  # For classification into 256 classes (byte values)

model = SimpleTransformer(input_dim, model_dim, num_heads, num_layers, output_dim)




In [4]:
# Function to train and evaluate the Transformer model for a specific byte of the keystream
def train_and_evaluate_transformer(X_train, y_train, X_test, y_test, byte_index):
    model = SimpleTransformer(input_dim, model_dim, num_heads, num_layers, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
    
    y_train_tensor = torch.tensor(y_train[:, byte_index], dtype=torch.long)
    y_test_tensor = torch.tensor(y_test[:, byte_index], dtype=torch.long)
    
    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), y_train_tensor)
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val[:, byte_index], dtype=torch.long))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
    
    patience = 5
    best_val_loss = float('inf')
    best_model = None
    patience_counter = 0

    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))  # Add a dummy dimension for sequence
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs.unsqueeze(1))  # Add a dummy dimension for sequence
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    model.load_state_dict(best_model)

    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test, dtype=torch.float32).unsqueeze(1))  # Add a dummy dimension for sequence
        _, predicted = torch.max(outputs, 1)
        test_accuracy = accuracy_score(y_test[:, byte_index], predicted.numpy())
        return test_accuracy

accuracies = []
for byte_index in range(10):
    print(f'\nTraining for byte {byte_index}...')
    accuracy = train_and_evaluate_transformer(X_train, y_train, X_test, y_test, byte_index)
    accuracies.append(accuracy)
    print(f'Accuracy for byte {byte_index} with Transformer: {accuracy * 100:.2f}%')

print("\nAccuracies for all 10 bytes with Transformer:")
for byte_index, accuracy in enumerate(accuracies):
    print(f'Byte {byte_index}: {accuracy * 100:.2f}%')



Training for byte 0...
Epoch 1/50, Loss: 5.5772, Accuracy: 0.0039, Val Loss: 5.5585, Val Accuracy: 0.0035
Epoch 2/50, Loss: 5.5524, Accuracy: 0.0041, Val Loss: 5.5490, Val Accuracy: 0.0055
Epoch 3/50, Loss: 5.5486, Accuracy: 0.0040, Val Loss: 5.5504, Val Accuracy: 0.0033
Epoch 4/50, Loss: 5.5473, Accuracy: 0.0039, Val Loss: 5.5475, Val Accuracy: 0.0032
Epoch 5/50, Loss: 5.5471, Accuracy: 0.0039, Val Loss: 5.5475, Val Accuracy: 0.0036
Epoch 6/50, Loss: 5.5466, Accuracy: 0.0042, Val Loss: 5.5475, Val Accuracy: 0.0038
Epoch 7/50, Loss: 5.5464, Accuracy: 0.0040, Val Loss: 5.5476, Val Accuracy: 0.0034
Epoch 8/50, Loss: 5.5455, Accuracy: 0.0041, Val Loss: 5.5466, Val Accuracy: 0.0036
Epoch 9/50, Loss: 5.5452, Accuracy: 0.0042, Val Loss: 5.5470, Val Accuracy: 0.0036
Epoch 10/50, Loss: 5.5453, Accuracy: 0.0040, Val Loss: 5.5471, Val Accuracy: 0.0040
Epoch 11/50, Loss: 5.5452, Accuracy: 0.0040, Val Loss: 5.5468, Val Accuracy: 0.0034
Epoch 12/50, Loss: 5.5449, Accuracy: 0.0040, Val Loss: 5.5466



Epoch 1/50, Loss: 5.5780, Accuracy: 0.0039, Val Loss: 5.5545, Val Accuracy: 0.0036
Epoch 2/50, Loss: 5.5522, Accuracy: 0.0039, Val Loss: 5.5486, Val Accuracy: 0.0041
Epoch 3/50, Loss: 5.5485, Accuracy: 0.0041, Val Loss: 5.5480, Val Accuracy: 0.0045
Epoch 4/50, Loss: 5.5473, Accuracy: 0.0041, Val Loss: 5.5473, Val Accuracy: 0.0039
Epoch 5/50, Loss: 5.5468, Accuracy: 0.0043, Val Loss: 5.5475, Val Accuracy: 0.0038
Epoch 6/50, Loss: 5.5464, Accuracy: 0.0044, Val Loss: 5.5480, Val Accuracy: 0.0040
Epoch 7/50, Loss: 5.5462, Accuracy: 0.0041, Val Loss: 5.5471, Val Accuracy: 0.0040
Epoch 8/50, Loss: 5.5453, Accuracy: 0.0045, Val Loss: 5.5473, Val Accuracy: 0.0040
Epoch 9/50, Loss: 5.5452, Accuracy: 0.0043, Val Loss: 5.5473, Val Accuracy: 0.0039
Epoch 10/50, Loss: 5.5450, Accuracy: 0.0044, Val Loss: 5.5472, Val Accuracy: 0.0040
Epoch 11/50, Loss: 5.5447, Accuracy: 0.0045, Val Loss: 5.5474, Val Accuracy: 0.0040
Epoch 12/50, Loss: 5.5447, Accuracy: 0.0044, Val Loss: 5.5470, Val Accuracy: 0.0032
E



Epoch 1/50, Loss: 5.5800, Accuracy: 0.0036, Val Loss: 5.5582, Val Accuracy: 0.0032
Epoch 2/50, Loss: 5.5525, Accuracy: 0.0041, Val Loss: 5.5494, Val Accuracy: 0.0040
Epoch 3/50, Loss: 5.5487, Accuracy: 0.0038, Val Loss: 5.5474, Val Accuracy: 0.0039
Epoch 4/50, Loss: 5.5475, Accuracy: 0.0040, Val Loss: 5.5463, Val Accuracy: 0.0036
Epoch 5/50, Loss: 5.5469, Accuracy: 0.0042, Val Loss: 5.5471, Val Accuracy: 0.0047
Epoch 6/50, Loss: 5.5464, Accuracy: 0.0039, Val Loss: 5.5477, Val Accuracy: 0.0048
Epoch 7/50, Loss: 5.5465, Accuracy: 0.0042, Val Loss: 5.5468, Val Accuracy: 0.0040
Epoch 8/50, Loss: 5.5453, Accuracy: 0.0042, Val Loss: 5.5465, Val Accuracy: 0.0038
Epoch 9/50, Loss: 5.5453, Accuracy: 0.0045, Val Loss: 5.5464, Val Accuracy: 0.0042
Early stopping
Accuracy for byte 2 with Transformer: 0.36%

Training for byte 3...




Epoch 1/50, Loss: 5.5794, Accuracy: 0.0039, Val Loss: 5.5564, Val Accuracy: 0.0037
Epoch 2/50, Loss: 5.5522, Accuracy: 0.0039, Val Loss: 5.5516, Val Accuracy: 0.0039
Epoch 3/50, Loss: 5.5482, Accuracy: 0.0042, Val Loss: 5.5494, Val Accuracy: 0.0029
Epoch 4/50, Loss: 5.5473, Accuracy: 0.0040, Val Loss: 5.5472, Val Accuracy: 0.0031
Epoch 5/50, Loss: 5.5466, Accuracy: 0.0039, Val Loss: 5.5492, Val Accuracy: 0.0030
Epoch 6/50, Loss: 5.5463, Accuracy: 0.0043, Val Loss: 5.5480, Val Accuracy: 0.0042
Epoch 7/50, Loss: 5.5461, Accuracy: 0.0041, Val Loss: 5.5488, Val Accuracy: 0.0028
Epoch 8/50, Loss: 5.5452, Accuracy: 0.0043, Val Loss: 5.5469, Val Accuracy: 0.0040
Epoch 9/50, Loss: 5.5450, Accuracy: 0.0046, Val Loss: 5.5471, Val Accuracy: 0.0034
Epoch 10/50, Loss: 5.5450, Accuracy: 0.0042, Val Loss: 5.5469, Val Accuracy: 0.0040
Epoch 11/50, Loss: 5.5446, Accuracy: 0.0039, Val Loss: 5.5470, Val Accuracy: 0.0040
Epoch 12/50, Loss: 5.5445, Accuracy: 0.0043, Val Loss: 5.5476, Val Accuracy: 0.0031
E



Epoch 1/50, Loss: 5.5774, Accuracy: 0.0041, Val Loss: 5.5549, Val Accuracy: 0.0042
Epoch 2/50, Loss: 5.5517, Accuracy: 0.0040, Val Loss: 5.5485, Val Accuracy: 0.0034
Epoch 3/50, Loss: 5.5485, Accuracy: 0.0042, Val Loss: 5.5482, Val Accuracy: 0.0042
Epoch 4/50, Loss: 5.5476, Accuracy: 0.0043, Val Loss: 5.5480, Val Accuracy: 0.0040
Epoch 5/50, Loss: 5.5471, Accuracy: 0.0040, Val Loss: 5.5470, Val Accuracy: 0.0040
Epoch 6/50, Loss: 5.5467, Accuracy: 0.0038, Val Loss: 5.5466, Val Accuracy: 0.0041
Epoch 7/50, Loss: 5.5461, Accuracy: 0.0039, Val Loss: 5.5477, Val Accuracy: 0.0042
Epoch 8/50, Loss: 5.5460, Accuracy: 0.0042, Val Loss: 5.5468, Val Accuracy: 0.0033
Epoch 9/50, Loss: 5.5453, Accuracy: 0.0041, Val Loss: 5.5461, Val Accuracy: 0.0037
Epoch 10/50, Loss: 5.5452, Accuracy: 0.0044, Val Loss: 5.5469, Val Accuracy: 0.0033


KeyboardInterrupt: 