# extract_data


In [1]:
import shutil
import os

# הנתיב לתיקייה שנוצרה בהרצה הקודמת
destination_folder = "/content/vctk_full"

# בדיקה אם התיקייה קיימת, ואז מחיקה
if os.path.exists(destination_folder):
    shutil.rmtree(destination_folder)
    print(f" התיקייה '{destination_folder}' נמחקה בהצלחה.")
else:
    print(f"ℹ התיקייה '{destination_folder}' לא קיימת, אין מה למחוק.")


ℹ התיקייה '/content/vctk_full' לא קיימת, אין מה למחוק.


In [2]:
import zipfile
import os
import shutil
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# List contents to verify paths (optional)
root_path = '/content/drive/My Drive/'
print("Contents of 'My Drive':", os.listdir(root_path))

subfolder_path = '/content/drive/My Drive/Colab Notebooks/'
print("Contents of 'Colab Notebooks':", os.listdir(subfolder_path))

# Define paths
zip_file = "/content/drive/My Drive/Colab Notebooks/archive.zip"  # Path to your ZIP file
destination_folder = "/content/vctk_samples"  # Where to extract selected data
wanted_speakers = ["p225", "p226", "p227", "p228"]  # Select specific speakers

# Check if the ZIP file exists
if os.path.isfile(zip_file):
    print(" ZIP file found:", zip_file)
else:
    raise FileNotFoundError(f" ZIP file not found: {zip_file}")

# Create destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# Selectively extract only desired speaker folders from the ZIP
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    extracted_files = 0
    for file in zip_ref.namelist():
        if any(f"VCTK-Corpus/wav48/{spk}/" in file or f"VCTK-Corpus/txt/{spk}/" in file for spk in wanted_speakers):
            # Ensure directory structure is preserved
            target_path = os.path.join(destination_folder, file)
            os.makedirs(os.path.dirname(target_path), exist_ok=True)
            with zip_ref.open(file) as source, open(target_path, 'wb') as target:
                shutil.copyfileobj(source, target)
            extracted_files += 1

print(f"\n Extraction complete: {extracted_files} files were extracted.")
print(f" Extracted data is available in: {destination_folder}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Contents of 'My Drive': ['Colab Notebooks']
Contents of 'Colab Notebooks': ['archive.zip', 'Untitled', 'FinalProjectCS.ipynb']
 ZIP file found: /content/drive/My Drive/Colab Notebooks/archive.zip

 Extraction complete: 2684 files were extracted.
 Extracted data is available in: /content/vctk_samples


#data_preprocessing

# generate_fake_data

In [3]:
pip install numpy==1.24.3 pandas==2.2.2 networkx==2.8.8 TTS==0.15.2



In [4]:
!apt-get install -y espeak-ng

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
espeak-ng is already the newest version (1.50+dfsg-10ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [3]:
import os
from TTS.api import TTS
import shutil

fake_audio_folder = "/content/vctk_samples/VCTK-Corpus/VCTK-Corpus/fake_wav48"
# פונקציה למחיקת כל קבצי האודיו שנוצרו
def clear_fake_audio_folder(fake_audio_folder):
    if os.path.exists(fake_audio_folder):
        shutil.rmtree(fake_audio_folder)  # מוחק את כל התיקייה כולל הקבצים שבה
        os.makedirs(fake_audio_folder, exist_ok=True)  # יוצר מחדש את התיקייה הריקה
        print(f" כל הקבצים בתיקייה '{fake_audio_folder}' נמחקו!")

clear_fake_audio_folder(fake_audio_folder)
quit()

 כל הקבצים בתיקייה '/content/vctk_samples/VCTK-Corpus/VCTK-Corpus/fake_wav48' נמחקו!


In [3]:
import os
from TTS.api import TTS
import shutil

# Define input and output directories
text_folder = "/content/vctk_samples/VCTK-Corpus/VCTK-Corpus/txt"  # Source text files
fake_audio_folder = "/content/vctk_samples/VCTK-Corpus/VCTK-Corpus/fake_wav48"  # Destination for generated speech
os.makedirs(fake_audio_folder, exist_ok=True)

# Load the TTS model (pre-trained model)
tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=True)

# Define a dictionary for speaker characteristics
# The key is the speaker ID (e.g., "225"), and the value is a tuple with gender, age, and accent
speaker_info = {
    "225": ("F", "22", "Southern England"),
    "226": ("M", "22", "Surrey"),
    "227": ("M", "38", "Cumbria"),
    "228": ("F", "22", "Southern England"),
}

# Iterate through each subfolder in the text folder
for subdir in os.listdir(text_folder):
    subdir_path = os.path.join(text_folder, subdir)

    # Ensure it's a directory
    if not os.path.isdir(subdir_path):
        continue

    # Create a corresponding subfolder in fake_audio_folder
    fake_subdir = os.path.join(fake_audio_folder, subdir)
    os.makedirs(fake_subdir, exist_ok=True)

    # Process each text file in the subfolder
    for filename in os.listdir(subdir_path):
        if filename.endswith(".txt"):  # Ensure it's a text file
            text_file_path = os.path.join(subdir_path, filename)

            # Read text content
            with open(text_file_path, "r", encoding="utf-8") as file:
                text = file.read().strip()

            # Extract speaker ID from filename (assuming filenames are like '225_01.txt')
            speaker_id = filename.split('_')[0]  # Extract the ID (e.g., "225")
            if speaker_id in speaker_info:
                gender, age, accent = speaker_info[speaker_id]
            else:
                gender, age, accent = "M", "30", "Neutral"  # Default values if not found

            # Here you can modify the model parameters based on gender, age, or accent
            # For example, you can pass a specific model for each speaker or change the prosody.

            # Generate fake audio file path
            output_path = os.path.join(fake_subdir, filename.replace(".txt", ".wav"))

            # Generate speech and save to file
            tts.tts_to_file(text=text, speaker=f"{speaker_id}", file_path=output_path)

            # You can log the gender, age, and accent for verification
            # print(f"Generated: {output_path} (Speaker ID: {speaker_id}, Gender: {gender}, Age: {age}, Accent: {accent})")

# print(f"Fake audio saved in {fake_audio_folder}")

 > Processing time: 5.001868724822998
 > Real-time factor: 1.3460652873260484
 > Text splitted to sentences.
['We believe the world has too many refugees already.']
 > Processing time: 5.026058673858643
 > Real-time factor: 1.4524087041122755
 > Text splitted to sentences.
['Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob.']
 > Processing time: 10.948988676071167
 > Real-time factor: 1.4507331044332832
 > Text splitted to sentences.
['No other vehicle was involved in the crash.']
 > Processing time: 3.8437747955322266
 > Real-time factor: 1.2829261661644102
 > Text splitted to sentences.
["I didn't feel under any pressure."]
 > Processing time: 2.2081100940704346
 > Real-time factor: 0.9366117954419261
 > Text splitted to sentences.
['Both sides have been hit by injury.']
 > Processing time: 2.415067672729492
 > Real-time factor: 0.9452613281682282
 > Text splitted to sentences.
['Neither side can win this war.']
 > Processing time:

In [9]:
from google.colab import files

# הנתיב לקובץ שברצונך להוריד למחשב שלך
file_path = "/content/vctk_samples/VCTK-Corpus/VCTK-Corpus/fake_wav48"

# הורדת הקובץ
files.download(file_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Rotem

In [None]:
import numpy as np
import librosa

def compute_lfcc(waveform, sr,
                 n_fft=512,
                 n_filter=80,
                 n_coeff=80,
                 hop_length=160,
                 win_length=400):
    # 1) power spectrogram
    S = librosa.stft(waveform,
                     n_fft=n_fft,
                     hop_length=hop_length,
                     win_length=win_length,
                     window='hann')
    S_mag = np.abs(S)**2

    # 2) keep first n_filter bins
    if S_mag.shape[0] < n_filter:
        S_mag = np.pad(S_mag,
                       ((0, n_filter - S_mag.shape[0]), (0, 0)),
                       mode='constant')
    S_lin = S_mag[:n_filter, :]

    # 3) log
    S_log = np.log10(S_lin + 1e-8)

    # 4) DCT → LFCC
    mfcc = librosa.feature.mfcc(sr=sr,
                                S=S_log,
                                n_mfcc=n_coeff,
                                dct_type=2,
                                norm='ortho')
    return mfcc  # (n_coeff, time_frames)

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
import torchaudio
import librosa

class VCTKDeepfakeDataset(Dataset):
    def __init__(self,
                 data_dir,
                 speakers=None,
                 mode="train",
                 train_ratio=0.7,
                 val_ratio=0.15,
                 segment_length=4.0,
                 sr=16000):
        self.sr = sr
        self.segment_length = segment_length

        # point _directly_ at vctk_samples, which has subfolders p225–p228
        wav_root = data_dir

        # choose which speakers to use
        if speakers is None:
            speakers = [d for d in os.listdir(wav_root)
                        if d.startswith("p")]
        all_files = []
        for spk in speakers:
            spk_dir = os.path.join(wav_root, spk)
            for fn in os.listdir(spk_dir):
                if fn.endswith(".wav"):
                    all_files.append(os.path.join(spk_dir, fn))
        all_files.sort()
        if not all_files:
            raise RuntimeError(f"No .wav files found in {wav_root} for {speakers!r}")

        # split train / val / test
        n = len(all_files)
        n_train = int(n * train_ratio)
        n_val   = int(n * val_ratio)
        if mode == "train":
            self.real_files = all_files[:n_train]
        elif mode == "val":
            self.real_files = all_files[n_train:n_train+n_val]
        elif mode == "test":
            self.real_files = all_files[n_train+n_val:]
        else:
            raise ValueError("mode must be train/val/test")
        if not self.real_files:
            raise RuntimeError(f"After split, {mode!r} has 0 files (total was {n})")

        # we'll mirror real→fake one-to-one
        self.n = len(self.real_files)

    def __len__(self):
        return self.n * 2

    def __getitem__(self, idx):
        # pick real or fake
        if idx < self.n:
            path, label = self.real_files[idx], 0
        else:
            path, label = self.real_files[idx - self.n], 1

        # load + fix length
        wav, _ = librosa.load(path, sr=self.sr)
        tgt = int(self.segment_length * self.sr)
        if len(wav) > tgt:
            wav = wav[:tgt]
        else:
            wav = np.pad(wav, (0, tgt-len(wav)), mode="constant")

        # **fake generation**: simple down/up-sample artifact
        if label == 1:
            t = torch.from_numpy(wav).float()
            t = torchaudio.transforms.Resample(self.sr, 4000)(t)
            t = torchaudio.transforms.Resample(4000, self.sr)(t)
            wav = t.numpy()

        # features → LFCC
        lfcc = compute_lfcc(wav, sr=self.sr)
        # per-coeff normalization
        m, s = lfcc.mean(axis=1, keepdims=True), lfcc.std(axis=1, keepdims=True)+1e-9
        lfcc = (lfcc - m)/s

        # to tensor (1×80×T)
        feat = torch.from_numpy(lfcc).float().unsqueeze(0)
        return feat, torch.tensor(label, dtype=torch.long)

In [None]:
from torch.utils.data import DataLoader

data_dir = "/content/vctk_samples"   # ← this folder contains p225/, p226/, p227/, p228/
speakers = ["p225","p226","p227","p228"]

train_ds = VCTKDeepfakeDataset(data_dir, speakers, mode="train")
val_ds   = VCTKDeepfakeDataset(data_dir, speakers, mode="val")
test_ds  = VCTKDeepfakeDataset(data_dir, speakers, mode="test")

print(len(train_ds), len(val_ds), len(test_ds))

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,  drop_last=True)
val_loader   = DataLoader(val_ds,   batch_size=16, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False)

1864 398 402


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FMSAttention(nn.Module):
    """Feature Map Scaling (FMS) attention block.
    Scales each channel's feature map by a learned factor in [0,1]."""
    def __init__(self, channels):
        super(FMSAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)  # global average pooling to (B, C, 1, 1)
        # Learn a scale for each channel (uses sigmoid activation for 0-1 output)
        self.fc = nn.Linear(channels, channels)
        # Initialize fc weights/bias (optional): start near identity scaling
        nn.init.xavier_uniform_(self.fc.weight, gain=1.0)
        nn.init.constant_(self.fc.bias, 0.0)
    def forward(self, x):
        # x shape: (B, C, F, T)
        B, C, Freq, Time = x.size()
        # Global average pool over freq & time
        y = self.avg_pool(x).view(B, C)             # shape (B, C)
        # Linear layer + sigmoid to get scale factors
        scales = torch.sigmoid(self.fc(y))          # shape (B, C), each in [0,1]
        # Reshape scale vector for broadcasting and scale the input
        scales = scales.view(B, C, 1, 1)            # (B, C, 1, 1)
        return x * scales                           # multiply each channel by its scale

class ResidualBlock(nn.Module):
    """Pre-activation residual block with two 3x3 conv layers."""
    def __init__(self, in_channels, out_channels, first_block=False):
        super(ResidualBlock, self).__init__()
        self.first_block = first_block
        # Convolution layers: 3x3 kernels, padding=1 to preserve dimensions
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn1  = nn.BatchNorm2d(in_channels)  # will be used only if not first_block
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2  = nn.BatchNorm2d(out_channels)
        # Identity skip convolution if channel dimensions differ
        if in_channels != out_channels:
            self.skip_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        else:
            self.skip_conv = None
        # Initialize conv weights (Kaiming He initialization for LeakyReLU)
        nn.init.kaiming_normal_(self.conv1.weight, a=0.3, nonlinearity='leaky_relu')
        nn.init.kaiming_normal_(self.conv2.weight, a=0.3, nonlinearity='leaky_relu')
        if self.skip_conv is not None:
            nn.init.kaiming_normal_(self.skip_conv.weight, a=0.3, nonlinearity='leaky_relu')
    def forward(self, x):
        # x: (B, in_channels, F, T)
        # Pre-activation: apply BN+LeakyReLU on input for conv1 (skip if first block)
        if not self.first_block:
            out = self.bn1(x)
            out = F.leaky_relu(out, negative_slope=0.3, inplace=True)
        else:
            out = x  # first block: skip initial BN and activation (already done in preprocessing)
        # First conv
        out = self.conv1(out)                      # -> (B, out_channels, F, T)
        # BN + activation for second conv
        out = self.bn2(out)
        out = F.leaky_relu(out, negative_slope=0.3, inplace=True)
        # Second conv
        out = self.conv2(out)                      # -> (B, out_channels, F, T)
        # Skip connection
        residual = x
        if self.skip_conv is not None:
            residual = self.skip_conv(residual)    # align channels if needed
        # Add identity and conv paths
        out = out + residual                       # element-wise sum
        return out

class SpecRNet(nn.Module):
    def __init__(self, input_channels=1, input_freq_bins=80):
        super(SpecRNet, self).__init__()
        # Preliminary normalization (BatchNorm + SELU activation)
        self.pre_bn = nn.BatchNorm2d(input_channels)
        self.pre_act = nn.SELU(inplace=True)
        # Residual blocks
        self.res1 = ResidualBlock(in_channels=1,  out_channels=20, first_block=True)
        self.res2 = ResidualBlock(in_channels=20, out_channels=64, first_block=False)
        self.res3 = ResidualBlock(in_channels=64, out_channels=64, first_block=False)
        # FMS attention blocks after each residual
        self.fms1 = FMSAttention(channels=20)
        self.fms2 = FMSAttention(channels=64)
        self.fms3 = FMSAttention(channels=64)
        # Pooling layers: after each resblock, do maxpool -> FMS -> maxpool.
        # We'll use two pooling ops per block. Kernel sizes chosen to match paper's output shapes.
        # Block1: input freq=80 -> output freq=20 (factor 4), input time -> time/4. Use two 2x2 pools.
        self.pool1a = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2), ceil_mode=True)
        self.pool1b = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2), ceil_mode=True)
        # Block2: input freq=20 -> output freq=5 (factor 4), time -> time/4. Use two 2x2 pools.
        self.pool2a = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2), ceil_mode=True)
        self.pool2b = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2), ceil_mode=True)
        # Block3: input freq=5 -> output freq=1, time -> time/4. Use 2x2 then 3x2 pooling.
        self.pool3a = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2), ceil_mode=True)
        self.pool3b = nn.MaxPool2d(kernel_size=(3,2), stride=(3,2), ceil_mode=True)
        # Pre-recurrent normalization (BatchNorm + SELU for final 64×1×T map)
        self.post_bn = nn.BatchNorm2d(64)
        self.post_act = nn.SELU(inplace=True)
        # Recurrent layers: 2-layer Bidirectional GRU (hidden size 64 per direction)
        self.gru = nn.GRU(input_size=64, hidden_size=64, num_layers=2, batch_first=True, bidirectional=True)
        # Classification head: two fully-connected layers (128 -> 128 -> 1)
        self.fc1 = nn.Linear(128, 128)
        self.fc2 = nn.Linear(128, 1)
        nn.init.xavier_uniform_(self.fc1.weight); nn.init.constant_(self.fc1.bias, 0.0)
        nn.init.xavier_uniform_(self.fc2.weight); nn.init.constant_(self.fc2.bias, 0.0)
    def forward(self, x):
        # x shape: (B, 1, 80, N) where N is number of frames (variable per sample or padded)
        # Preliminary normalization
        x = self.pre_bn(x)
        x = self.pre_act(x)                        # -> (B, 1, 80, N)
        # Residual block 1
        x = self.res1(x)                           # -> (B, 20, 80, N)
        x = self.pool1a(x)                         # first pooling (downsample)
        x = self.fms1(x)                           # FMS attention scaling
        x = self.pool1b(x)                         # second pooling
        # Residual block 2
        x = self.res2(x)                           # -> (B, 64, 20, N/4)
        x = self.pool2a(x)
        x = self.fms2(x)
        x = self.pool2b(x)
        # Residual block 3
        x = self.res3(x)                           # -> (B, 64, 5, N/16)
        x = self.pool3a(x)
        x = self.fms3(x)
        x = self.pool3b(x)                         # -> (B, 64, 1, N/64)
        # Pre-recurrent BN + activation
        x = self.post_bn(x)
        x = self.post_act(x)                       # shape remains (B, 64, 1, T')
        # Prepare for RNN: flatten freq dimension (which is 1) and swap to (B, T', features)
        # x is (B, 64, 1, T') -> (B, 64, T')
        x = x.squeeze(2)                           # -> (B, 64, T')
        x = x.permute(0, 2, 1)                     # -> (B, T', 64) sequence of 64-dim vectors
        # Bi-GRU processing
        gru_out, gru_h = self.gru(x)               # gru_out: (B, T', 128), gru_h: (4, B, 64) for 2 layers * 2 directions
        # Take last hidden state of the last GRU layer (concatenate forward & backward)
        # gru_h[-2] = last layer forward hidden, gru_h[-1] = last layer backward hidden
        h_forward = gru_h[-2]                      # (B, 64)
        h_backward = gru_h[-1]                     # (B, 64)
        h_final = torch.cat([h_forward, h_backward], dim=1)  # (B, 128)
        # Fully connected layers for classification
        x = F.relu(self.fc1(h_final))              # (B, 128)
        logit = self.fc2(x).squeeze(1)             # (B,) - raw score
        prob = torch.sigmoid(logit)                # probability in [0,1]
        return prob  # or return logit if using BCEWithLogitsLoss during training

In [None]:
import torch
import torch.optim as optim

# Initialize model, loss, optimizer
model = SpecRNet(input_channels=1, input_freq_bins=80)
model = model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.BCELoss()  # use BCE loss since model outputs probability
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 10
best_val_loss = float('inf')
for epoch in range(1, num_epochs+1):
    model.train()
    running_loss = 0.0
    for features, labels in train_loader:
        features = features.to(model.device) if hasattr(model, 'device') else features.to(model.fc1.weight.device)
        labels = labels.to(features.device).float()  # BCELoss expects float labels for probabilities
        optimizer.zero_grad()
        outputs = model(features)            # forward pass -> outputs shape (B,) probability
        loss = criterion(outputs, labels)    # compute binary cross-entropy loss
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * features.size(0)
    avg_train_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features = features.to(model.fc1.weight.device)
            labels = labels.to(features.device).float()
            outputs = model(features)
            # Compute loss
            loss = criterion(outputs, labels)
            val_loss += loss.item() * features.size(0)
            # Compute accuracy
            preds = (outputs >= 0.5).long()      # threshold at 0.5
            correct += (preds == labels.long()).sum().item()
            total += features.size(0)
    avg_val_loss = val_loss / len(val_loader.dataset)
    val_accuracy = correct / total * 100.0
    print(f"Epoch {epoch}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}, Val Acc = {val_accuracy:.2f}%")
    # Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), "best_specrnet.pth")
        print("  (Best model saved)")

Epoch 1: Train Loss = 0.0927, Val Loss = 0.0134, Val Acc = 99.25%
  (Best model saved)
Epoch 2: Train Loss = 0.0373, Val Loss = 0.0790, Val Acc = 97.24%
Epoch 3: Train Loss = 0.0343, Val Loss = 0.0004, Val Acc = 100.00%
  (Best model saved)
Epoch 4: Train Loss = 0.0060, Val Loss = 1.3479, Val Acc = 74.62%
Epoch 5: Train Loss = 0.0032, Val Loss = 0.0146, Val Acc = 99.50%
Epoch 6: Train Loss = 0.0057, Val Loss = 0.0002, Val Acc = 100.00%
  (Best model saved)
Epoch 7: Train Loss = 0.0080, Val Loss = 0.4645, Val Acc = 82.66%
Epoch 8: Train Loss = 0.0018, Val Loss = 0.0020, Val Acc = 99.75%
Epoch 9: Train Loss = 0.0069, Val Loss = 0.0250, Val Acc = 99.50%
Epoch 10: Train Loss = 0.0021, Val Loss = 0.0204, Val Acc = 99.75%


In [None]:
# Evaluation on test set
test_dataset = VCTKDeepfakeDataset(data_dir, speakers, mode='test')
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
model.eval()
model.load_state_dict(torch.load("best_specrnet.pth", map_location=features.device))
all_preds = []
all_labels = []
with torch.no_grad():
    for features, labels in test_loader:
        features = features.to(model.fc1.weight.device)
        outputs = model(features)            # probabilities
        all_preds.extend(outputs.cpu().numpy().tolist())
        all_labels.extend(labels.numpy().tolist())

# Compute accuracy
pred_labels = [1 if p >= 0.5 else 0 for p in all_preds]
accuracy = sum(1 for pl, tl in zip(pred_labels, all_labels) if pl == tl) / len(all_labels)
print(f"Test Accuracy: {accuracy*100:.2f}%")

# Compute confusion matrix
from sklearn.metrics import confusion_matrix, roc_auc_score
cm = confusion_matrix(all_labels, pred_labels, labels=[0,1])
print("Confusion Matrix [[TN, FP],[FN, TP]]:\n", cm)
# Compute ROC AUC
auc = roc_auc_score(all_labels, all_preds)
print(f"ROC AUC: {auc:.3f}")

Test Accuracy: 100.00%
Confusion Matrix [[TN, FP],[FN, TP]]:
 [[201   0]
 [  0 201]]
ROC AUC: 1.000
