# Installing Libraries

In [None]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.2-py2.py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.6/85.6 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2024.1-cp310-cp310-linux_x86_64.whl size=661204 sha256=45b7ddb

# Importing Libraries

In [None]:
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import numpy as np
import itertools
import random

# Traditional RC4

In [None]:
# CUDA kernel for the RC4 brute force attack
cuda_code = """
__global__ void rc4_brute_force_kernel(unsigned char *keys, unsigned char *outputs, int key_length, int output_length, int num_keys) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx >= num_keys) return;

    unsigned char S[256];
    int j = 0;

    // Initialize S array
    for (int i = 0; i < 256; i++) {
        S[i] = i;
    }

    // Key scheduling algorithm (KSA)
    for (int i = 0; i < 256; i++) {
        j = (j + S[i] + keys[idx * key_length + i % key_length]) % 256;
        unsigned char temp = S[i];
        S[i] = S[j];
        S[j] = temp;
    }

    int i = 0;
    j = 0;

    // Pseudo-random generation algorithm (PRGA)
    for (int k = 0; k < output_length; k++) {
        i = (i + 1) % 256;
        j = (j + S[i]) % 256;
        unsigned char temp = S[i];
        S[i] = S[j];
        S[j] = temp;
        unsigned char K = S[(S[i] + S[j]) % 256];
        outputs[idx * output_length + k] = K;
    }
}
"""

In [None]:
# Compile the CUDA code
mod = SourceModule(cuda_code)
rc4_kernel = mod.get_function("rc4_brute_force_kernel")

In [None]:
def rc4_encrypt(key, plaintext):
    S = list(range(256))
    j = 0
    out = []

    # Key Scheduling Algorithm (KSA)
    for i in range(256):
        j = (j + S[i] + key[i % len(key)]) % 256
        S[i], S[j] = S[j], S[i]

    i = j = 0
    # Pseudo-random Generation Algorithm (PRGA)
    for char in plaintext:
        i = (i + 1) % 256
        j = (j + S[i]) % 256
        S[i], S[j] = S[j], S[i]
        K = S[(S[i] + S[j]) % 256]
        out.append(char ^ K)
    return out

In [None]:
def simulate_encryption(key, plaintext):
    key_bytes = [ord(c) for c in key]  # Convert key to bytes
    plaintext_bytes = [ord(c) for c in plaintext]  # Convert plaintext to bytes
    encrypted = rc4_encrypt(key_bytes, plaintext_bytes)
    encrypted_str = ''.join([chr(e) for e in encrypted])  # Convert encrypted bytes to string
    return key, encrypted, encrypted_str

In [None]:
def brute_force_gpu(known_output, plaintext_length, key_length, plaintext):
    num_keys = 256 ** key_length
    keys = np.array(list(itertools.product(range(256), repeat=key_length)), dtype=np.uint8).reshape(-1)
    outputs = np.zeros(num_keys * plaintext_length, dtype=np.uint8)

    # Create CUDA events for timing
    start = cuda.Event()
    end = cuda.Event()

    # Record the start time
    start.record()

    # Use smaller grid and block sizes for testing to avoid illegal memory access
    block_size = 256
    grid_size = (num_keys + block_size - 1) // block_size

    rc4_kernel(cuda.In(keys), cuda.Out(outputs), np.int32(key_length), np.int32(plaintext_length), np.int32(num_keys), block=(block_size,1,1), grid=(grid_size,1))

    # Record the end time
    end.record()
    end.synchronize()

    # Calculate the elapsed time
    time_taken = start.time_till(end)
    print(f"Time taken: {time_taken} milliseconds")

    for i in range(num_keys):
        output_slice = outputs[i * plaintext_length:(i + 1) * plaintext_length]
        decrypted_text = ''.join([chr(output_slice[j] ^ known_output[j]) for j in range(plaintext_length)])
        if decrypted_text == plaintext:
            found_key_bytes = keys[i * key_length:(i + 1) * key_length]
            found_key_str = ''.join([chr(byte) for byte in found_key_bytes])
            print("Key found:", found_key_str)
            return found_key_str, decrypted_text

    print("No key found")
    return None, None

In [None]:
key = "see"
plaintext = "hello"
key_used, encrypted_text, encrypted_str = simulate_encryption(key, plaintext)
print("Actual Key Used:", key_used)
print("Encrypted Text (array):", encrypted_text)
print("Encrypted Text (string):", encrypted_str)

# Brute force using GPU
found_key, decrypted_text = brute_force_gpu(encrypted_text, len(plaintext), len(key), plaintext)
if found_key is not None:
    print("Decrypted Text:", decrypted_text)
else:
    print("Failed to decrypt the text")


Actual Key Used: see
Encrypted Text (array): [63, 139, 42, 228, 120]
Encrypted Text (string): ?*äx
Time taken: 4605.22412109375 milliseconds
Key found: see
Decrypted Text: hello


# Enhanced RC4


In [None]:
cuda_code = """
__global__ void rc4_ncksa_kernel(unsigned char *keys, unsigned char *outputs, int key_length, int output_length, int num_keys) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx >= num_keys) return;

    unsigned char S[256];
    int j = 0, k = 0;

    // Initialize S array
    for (int i = 0; i < 256; i++) {
        S[i] = i;
    }

    // Non-linear Conditional Key Scheduling Algorithm (NCKSA)
    for (int i = 0; i < 256; i++) {
        j = (j + S[i] + keys[idx * key_length + i % key_length]) % 256;
        k = (keys[idx * key_length + i % key_length] & S[i]) | (keys[(i + 1) % key_length] ^ S[j]);  // Non-linear operation

        int non_linear_transform = (k * S[i] + keys[(i + 2) % key_length]) % 256;
        unsigned char temp = S[i];
        S[i] = S[non_linear_transform];
        S[non_linear_transform] = temp;

        int non_linear_transform_2 = (k ^ S[j]) + keys[(i + 3) % key_length];
        temp = S[i];
        S[i] = S[non_linear_transform_2];
        S[non_linear_transform_2] = temp;

        if (k % 3 == 0) {  // Conditional variability
            int swapIndex = (j * k + 1) % 256;
            unsigned char temp = S[i];
            S[i] = S[swapIndex];
            S[swapIndex] = temp;
        } else {
            unsigned char temp = S[i];
            S[i] = S[j];
            S[j] = temp;
        }
    }

    int i = 0;
    j = 0;
    unsigned char last_output = 0; // Add last_output to provide feedback

    // Enhanced Pseudo-random generation algorithm (PRGA)
    for (int k = 0; k < output_length; k++) {
        i = (i + 1) % 256;
        j = (j + S[i] + last_output) % 256;  // Incorporate last output in the update of j
        unsigned char temp = S[i];
        S[i] = S[j];
        S[j] = temp;
        last_output = S[(S[i] + S[j]) % 256];  // Update last_output with current output
        outputs[idx * output_length + k] = last_output;
    }
}

"""


In [None]:
# Compile the CUDA code
mod = SourceModule(cuda_code)
rc4_kernel = mod.get_function("rc4_ncksa_kernel")

In [None]:
def generate_random_key(length):
    return [random.randint(0, 255) for _ in range(length)]

In [None]:
def simulate_encryption(key_length):
    key = generate_random_key(key_length)
    plaintext = np.zeros(10, dtype=np.uint8)  # Simple plaintext
    encrypted = np.zeros_like(plaintext)
    key_np = np.array(key, dtype=np.uint8)
    rc4_kernel(cuda.In(key_np), cuda.Out(encrypted), np.int32(key_length), np.int32(len(plaintext)), np.int32(1), block=(256,1,1), grid=(1,1))
    return key, encrypted.tolist()

In [None]:
def brute_force_gpu(known_output, key_length):
    num_keys = 256 ** key_length
    keys = np.array(list(itertools.product(range(256), repeat=key_length)), dtype=np.uint8).reshape(-1)
    outputs = np.zeros(num_keys * len(known_output), dtype=np.uint8)

    # Create CUDA events for timing
    start = cuda.Event()
    end = cuda.Event()

    # Record the start time
    start.record()

    rc4_kernel(cuda.In(keys), cuda.Out(outputs), np.int32(key_length), np.int32(len(known_output)), np.int32(num_keys), block=(256,1,1), grid=((num_keys + 255) // 256,1))

    # Record the end time
    end.record()
    end.synchronize()

    # Calculate the elapsed time
    time_taken = start.time_till(end)
    print(f"Time taken: {time_taken} milliseconds")

    for i in range(num_keys):
        output_slice = outputs[i * len(known_output):(i + 1) * len(known_output)]
        if np.array_equal(output_slice, known_output):
            print("Key found:", keys[i * key_length:(i + 1) * key_length])
            return keys[i * key_length:(i + 1) * key_length]

    print("No key found")
    return None

In [None]:
key_length = 3
key_used, known_output = simulate_encryption(key_length)
print("Actual Key Used:", key_used)
print("Known Output (for testing):", known_output)

# Brute force using GPU
brute_force_gpu(known_output, key_length)


Actual Key Used: [106, 18, 197]
Known Output (for testing): [167, 93, 228, 197, 0, 114, 62, 12, 131, 109]


  globals().clear()


Time taken: 11368.88671875 milliseconds
No key found
