In [42]:
!wget https://raw.githubusercontent.com/Dewjy02/HPC-ASSIGNMENT/4b9d288c78dad6d69fe56189a7557b39456414c7/passCrackCuda/generate_hash.c -O generate_hash.c

--2025-10-22 15:56:34--  https://raw.githubusercontent.com/Dewjy02/HPC-ASSIGNMENT/4b9d288c78dad6d69fe56189a7557b39456414c7/passCrackCuda/generate_hash.c
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1219 (1.2K) [text/plain]
Saving to: ‘generate_hash.c’


2025-10-22 15:56:34 (86.7 MB/s) - ‘generate_hash.c’ saved [1219/1219]



In [48]:
%%writefile cuda_crack.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

//writes 11-char encrypted output into outEnc
__device__ void deviceCrypt(const char* rawPassword, char* outEnc){
    outEnc[0] = rawPassword[0] + 3;
    outEnc[1] = rawPassword[0] - 2;
    outEnc[2] = rawPassword[0] + 1;
    outEnc[3] = rawPassword[1] + 1;
    outEnc[4] = rawPassword[1] - 2;
    outEnc[5] = rawPassword[1] - 3;
    outEnc[6] = rawPassword[2] + 1;
    outEnc[7] = rawPassword[2] - 2;
    outEnc[8] = rawPassword[3] + 4;
    outEnc[9] = rawPassword[3] - 3;
    outEnc[10] = '\0';

    //enforce bounds with Corrected wrap-around logic
    for(int i = 0; i < 10; i++){
        if(i >= 0 && i < 6){ // letter section -> ensure lowercase a-z (97-122)
            if(outEnc[i] > 122){
                outEnc[i] = (outEnc[i] - 122) + 97 - 1; // Corrected high wrap
            } else if(outEnc[i] < 97){
                outEnc[i] = 122 - (97 - outEnc[i] - 1); // Corrected low wrap
            }
        } else { // number section -> ensure digits 0-9 (48-57)
            if(outEnc[i] > 57){
                outEnc[i] = (outEnc[i] - 57) + 48 - 1; // Corrected high wrap
            } else if(outEnc[i] < 48){
                outEnc[i] = 57 - (48 - outEnc[i] - 1); // Corrected low wrap
            }
        }
    }
}

//simple device compare function for 10-character strings
__device__ bool encMatch(const char* a, const char* b){
    for(int i=0;i<10;i++){
        if(a[i] != b[i]) return false;
    }
    return true;
}

//each thread checks one candidate password
__global__ void crackKernel(const char* targetEnc, char* resultRaw, int* foundFlag, unsigned long long totalCombinations){
    unsigned long long tid = (unsigned long long)blockIdx.x * blockDim.x + threadIdx.x;
    if(tid >= totalCombinations) return;
    if(atomicAdd(foundFlag, 0) != 0) return;

    unsigned long long letters_section = 26ULL * 10ULL * 10ULL;
    unsigned long long a = tid / letters_section; // 0..25
    unsigned long long rem = tid % letters_section;
    unsigned long long b = rem / (10ULL * 10ULL); // 0..25
    unsigned long long rem2 = rem % (10ULL * 10ULL);
    unsigned long long c = rem2 / 10ULL; // 0..9
    unsigned long long d = rem2 % 10ULL; // 0..9

    char raw[5];
    raw[0] = (char)('a' + (int)a);
    raw[1] = (char)('a' + (int)b);
    raw[2] = (char)('0' + (int)c);
    raw[3] = (char)('0' + (int)d);
    raw[4] = '\0';

    char enc[11];
    deviceCrypt(raw, enc);

    if(encMatch(enc, targetEnc)){
        // try to claim the result (only first claimer writes)
        if(atomicCAS(foundFlag, 0, 1) == 0){
            // copy raw password to resultRaw (5 bytes including null)
            for(int i=0;i<5;i++) resultRaw[i] = raw[i];
        }
    }
}

void checkCudaErr(cudaError_t err, const char* msg){
    if(err != cudaSuccess){
        fprintf(stderr, "%s: %s\n", msg, cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
}

int main(int argc, char** argv){
    // target encrypted password must be 10 characters
    char hostTargetEnc[11];

    if(argc < 2){
        fprintf(stderr, "Error: No encrypted password provided.\n");
        fprintf(stderr, "Usage: %s <encrypted_hash>\n", argv[0]);
        return EXIT_FAILURE;
    }

    //check the argument length
    size_t len = strlen(argv[1]);
    if(len < 10){
        fprintf(stderr, "Error: Provided encrypted password is too short. Expecting 10 characters.\n");
        return EXIT_FAILURE;
    }

    //argument is valid, copy it to the device
    for(int i=0;i<10;i++) hostTargetEnc[i] = argv[1][i];
    hostTargetEnc[10] = '\0';
    printf("Target encrypted password: %s\n", hostTargetEnc);

    // prepare device copies
    char* d_targetEnc = NULL;
    char* d_resultRaw = NULL;
    int* d_found = NULL;

    checkCudaErr(cudaMalloc((void**)&d_targetEnc, 11 * sizeof(char)), "cudaMalloc targetEnc");
    checkCudaErr(cudaMalloc((void**)&d_resultRaw, 5 * sizeof(char)), "cudaMalloc resultRaw");
    checkCudaErr(cudaMalloc((void**)&d_found, sizeof(int)), "cudaMalloc found flag");

    checkCudaErr(cudaMemcpy(d_targetEnc, hostTargetEnc, 11 * sizeof(char), cudaMemcpyHostToDevice), "cudaMemcpy targetEnc to device");
    int zero = 0;
    checkCudaErr(cudaMemcpy(d_found, &zero, sizeof(int), cudaMemcpyHostToDevice), "cudaMemcpy found init");

    unsigned long long total = 26ULL * 26ULL * 10ULL * 10ULL; // 67600
    int threadsPerBlock = 256;
    unsigned long long blocks = (total + threadsPerBlock - 1ULL) / threadsPerBlock;
    if(blocks < 2) blocks = 2; // ensure more than one block as required by assessment

    printf("Launching kernel with %llu blocks x %d threads (total threads >= %llu).\n", (unsigned long long)blocks, threadsPerBlock, total);

    crackKernel<<<(int)blocks, threadsPerBlock>>>(d_targetEnc, d_resultRaw, d_found, total);
    cudaError_t kerr = cudaGetLastError();
    if(kerr != cudaSuccess){
        fprintf(stderr, "Kernel launch error: %s\n", cudaGetErrorString(kerr));
        return EXIT_FAILURE;
    }

    // wait for GPU to finish
    checkCudaErr(cudaDeviceSynchronize(), "cudaDeviceSynchronize");

    int hostFound = 0;
    checkCudaErr(cudaMemcpy(&hostFound, d_found, sizeof(int), cudaMemcpyDeviceToHost), "cudaMemcpy found to host");

    if(hostFound){
        char hostResult[5];
        checkCudaErr(cudaMemcpy(hostResult, d_resultRaw, 5 * sizeof(char), cudaMemcpyDeviceToHost), "cudaMemcpy resultRaw to host");
        hostResult[4] = '\0';
        printf("Password found: %s\n", hostResult);
    } else {
        printf("Password was NOT found.\n");
    }

    // free memory
    cudaFree(d_targetEnc);
    cudaFree(d_resultRaw);
    cudaFree(d_found);

    return 0;
}

Overwriting cuda_crack.cu


In [44]:
!nvcc -o cuda_crack cuda_crack.cu \
  -gencode=arch=compute_50,code=sm_50 \
  -gencode=arch=compute_60,code=sm_60 \
  -gencode=arch=compute_61,code=sm_61 \
  -gencode=arch=compute_70,code=sm_70 \
  -gencode=arch=compute_75,code=sm_75 \
  -gencode=arch=compute_80,code=sm_80 \
  -gencode=arch=compute_86,code=sm_86


In [45]:
!gcc -o generate_hash generate_hash.c

In [46]:
!./generate_hash at42

dyburq5269


In [47]:
%%bash
./cuda_crack dyburq5269

Target encrypted password: dyburq5269
Launching kernel with 265 blocks x 256 threads (total threads >= 67600).
Password found: at42
