# Import and Preparation

In [1]:
import os
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import classification_report, accuracy_score


In [2]:
# Custom Dataset class
class TraceDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

# Basic RNN Model

In [3]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.rnn.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]  # Take the last output
        out = self.fc(out)
        return out

# Train the Model

In [4]:
def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


# Evaluation and Visualization

In [5]:
def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    accuracy = accuracy_score(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, zero_division=0)
    return accuracy, report

# Execution

In [6]:
# Load dataset
def get_logs(logs_dir, mutation_index):
    files = os.listdir(logs_dir)
    logs = []
    for file_name in files:
        if file_name.startswith(f"mutation{mutation_index}_"):
            with open(os.path.join(logs_dir, file_name), "r") as f:
                logs.append(json.load(f))
    print(f"Number of logs: {len(logs)}")
    return logs

# Combine logs
def combine_logs(logs):
    combined_logs = [log for log in logs if isinstance(log, dict)]
    df = pd.DataFrame(combined_logs)
    return df

In [7]:
def main():
    base_dir = "../fuzz_test"
    logs_subdirs_to_train = {
        "textdistance/test_DamerauLevenshtein/logs": [1, 2, 3, 4, 5],
        "dateutil/test_date_parse/logs": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    }

    for logs_subdir, mutation_indices in logs_subdirs_to_train.items():
        for mutation_index in mutation_indices:
            logs = get_logs(os.path.join(base_dir, logs_subdir), mutation_index)
            if not logs:
                continue

            df = combine_logs(logs)
            df = df.dropna(subset=["exec_trace", "verdict"])



            y = df["verdict"].apply(lambda x: 1 if x.lower() == "pass" else 0).values
            X = df["exec_trace"].apply(lambda x: " ".join(x) if isinstance(x, list) else "").values

            # Vectorize traces
            tfidf_vectorizer = TfidfVectorizer(max_features=500)
            X_tfidf = tfidf_vectorizer.fit_transform(X).toarray()

            # Convert to sequences
            X_tfidf = np.expand_dims(X_tfidf, axis=1)
            
            # Train/Test split
            X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)
            
            # Datasets and Dataloaders
            train_dataset = TraceDataset(X_train, y_train)
            test_dataset = TraceDataset(X_test, y_test)
            train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
            test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


            # RNN Parameters
            input_size = X_train.shape[2]
            hidden_size = 128
            num_layers = 2
            num_classes = 2

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)

            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

            # Training loop
            num_epochs = 10
            for epoch in range(num_epochs):
                train_model(model, train_loader, criterion, optimizer, device)
                # print(f"Epoch {epoch + 1}/{num_epochs} completed.")

            # Evaluation
            accuracy, report = evaluate_model(model, test_loader, device)
            # Include file name and parameters in the printed report
            print(f"---------- Evaluation Report ----------")
            print(f"Logs Subdirectory: {logs_subdir}")
            print(f"Mutation Index: {mutation_index}")
            print(f"TF-IDF Vectorization Parameters: max_features=500")
            print(f"RNN Model Parameters: hidden_size=128, num_layers=2, num_classes=2")
            print(f"Test Accuracy: {accuracy:.4f}")
            print("Classification Report:")
            print(report)
            print(f"---------------------------------------")


if __name__ == "__main__":
    main()

Number of logs: 10000
---------- Evaluation Report ----------
Logs Subdirectory: textdistance/test_DamerauLevenshtein/logs
Mutation Index: 1
TF-IDF Vectorization Parameters: max_features=500
RNN Model Parameters: hidden_size=128, num_layers=2, num_classes=2
Test Accuracy: 0.9820
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        36
           1       0.98      1.00      0.99      1964

    accuracy                           0.98      2000
   macro avg       0.49      0.50      0.50      2000
weighted avg       0.96      0.98      0.97      2000

---------------------------------------
Number of logs: 10000
---------- Evaluation Report ----------
Logs Subdirectory: textdistance/test_DamerauLevenshtein/logs
Mutation Index: 2
TF-IDF Vectorization Parameters: max_features=500
RNN Model Parameters: hidden_size=128, num_layers=2, num_classes=2
Test Accuracy: 0.9170
Classification Report:
              precision    