**Setting Up CUDA**

In [1]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

Reading package lists... Done
Building dependency tree       
Reading state information... Done
Note, selecting 'nvidia-kernel-common-418-server' for glob 'nvidia*'
Note, selecting 'nvidia-325-updates' for glob 'nvidia*'
Note, selecting 'nvidia-346-updates' for glob 'nvidia*'
Note, selecting 'nvidia-driver-binary' for glob 'nvidia*'
Note, selecting 'nvidia-331-dev' for glob 'nvidia*'
Note, selecting 'nvidia-304-updates-dev' for glob 'nvidia*'
Note, selecting 'nvidia-compute-utils-418-server' for glob 'nvidia*'
Note, selecting 'nvidia-384-dev' for glob 'nvidia*'
Note, selecting 'nvidia-libopencl1-346-updates' for glob 'nvidia*'
Note, selecting 'nvidia-fs-prebuilt' for glob 'nvidia*'
Note, selecting 'nvidia-driver-440-server' for glob 'nvidia*'
Note, selecting 'nvidia-340-updates-uvm' for glob 'nvidia*'
Note, selecting 'nvidia-dkms-450-server' for glob 'nvidia*'
Note, selecting 'nvidia-kernel-common' for glob 'nvidia*'
Note, selecting 'nvidia-kernel-source-440-server' for glob 'nvidia*'


In [2]:
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2

--2021-11-27 19:41:35--  https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64
Resolving developer.nvidia.com (developer.nvidia.com)... 152.199.39.144
Connecting to developer.nvidia.com (developer.nvidia.com)|152.199.39.144|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://developer.nvidia.com/compute/cuda/9.2/prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 [following]
--2021-11-27 19:41:36--  https://developer.nvidia.com/compute/cuda/9.2/prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64
Reusing existing connection to developer.nvidia.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://developer.download.nvidia.com/compute/cuda/9.2/secure/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb?A5T9E7W6wM1XMdsPd5ckgRFbhtk4CrFFHDGytUxEuIybLBYS2RsvtfHv8xAf4LIMYLINLms9x7p8HMqOlfDu_lylGnshCZhM-srtiVeYB2lT7

In [3]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Wed_Apr_11_23:16:29_CDT_2018
Cuda compilation tools, release 9.2, V9.2.88


In [4]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+git://github.com/andreinechaev/nvcc4jupyter.git
  Cloning git://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-19epbhtj
  Running command git clone -q git://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-19epbhtj
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4305 sha256=83ffb6397ab2d37a0665566346f35df68264f4d248f9b1adf811c6c851f22e45
  Stored in directory: /tmp/pip-ephem-wheel-cache-qrirqw03/wheels/c5/2b/c0/87008e795a14bbcdfc7c846a00d06981916331eb980b6c8bdf
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2


In [5]:
%load_ext nvcc_plugin

created output directory at /content/src
Out bin /content/result.out


**STEGANOGRAPHY**

**1)Parallel Encryption ->CUDA**

In [6]:
%%writefile parallel_encrypt.cu
#include "/content/imgh.h"
#include "/content/audh.h"
#include <immintrin.h>
#include <iostream>
#include <fstream>
#include <cmath>
#include <functional>
#include <chrono>
#include <cassert>
#include <random>
#include <omp.h>
const std::string pendl = ".\n";
#include <bits/stdc++.h>
using namespace std;

//#define x 512

__global__
void encrypt(unsigned char * inputImageData, unsigned char * outputImageData, int width, int height,
        char * audioData, long long audioSize) {
    long long index = blockIdx.x * blockDim.x + threadIdx.x;   //blockDim.x->no of thread in the block

    if(index < audioSize){
        unsigned char audioByte = (unsigned char)audioData[index];
        for(size_t j = 0; j < 8; j++) {
				size_t index1 = (8 * index) + j;
        outputImageData[index1]=(inputImageData[index1] | 1) & (254 + ((audioData[index] >> (7 - j) & 1)));
			}
  }
  
}


int main(int argc, char *argv[]){

    char *inputImageFile = "/content/img.ppm";
    char *inputAudioFile = "/content/a.mp3";

  std::ofstream myfile;
  myfile.open ("CUDAEncryptVaryingThreads.csv");


int THREADS_PER_BLOCK=512;
while(THREADS_PER_BLOCK<=512)
{
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    PPMimg *inpImg = readPPM(inputImageFile);
    int width = inpImg->width;
    int height = inpImg->height;
    long long totPixels = (long long)width * height;
    PPMpixel *inData = inpImg->data;
    PPMpixel *outData = (PPMpixel *)malloc(sizeof(PPMpixel) * totPixels);//size of one pixel * total pixels
    unsigned char * inputImageData = ppmTochar(inData, width, height); //converting arrays of pixels to linear array
    unsigned char * outputImageData = (unsigned char *)malloc(totPixels * 3ll);
    unsigned char *d_inputImageData, *d_outputImageData;
    cudaMalloc((void**)&d_inputImageData, totPixels * 3ll);
    cudaMalloc((void**)&d_outputImageData, totPixels * 3ll);
    cudaMemcpy(d_inputImageData,inputImageData,totPixels * 3ll,cudaMemcpyHostToDevice);
    cudaMemcpy(d_outputImageData,inputImageData,totPixels * 3ll,cudaMemcpyHostToDevice);    
    MP3File *inpAudio = readMP3(inputAudioFile);
    char *audioData = inpAudio->data;
    char *d_audioData;
    cudaMalloc((void**)&d_audioData, inpAudio->size);
    cudaMemcpy(d_audioData, audioData, inpAudio->size, cudaMemcpyHostToDevice);
    cout << "Size of text file = " << inpAudio->size << " bytes ("
         << (inpAudio->size * 8) << " bits)\n";
    cout << "Size of image file = " << totPixels * 3 << " bytes\n";

    long long audioSize = inpAudio -> size; //total chunks of 1 byte read
    dim3 blockDim(THREADS_PER_BLOCK, 1, 1);
    dim3 gridDim((audioSize-1)/THREADS_PER_BLOCK + 1, 1, 1); //(audioSize-1)/THREADS_PER_BLOCK + 1 ->to handle case when not divisible

    cout<<"Blocks = "<<(audioSize-1)/THREADS_PER_BLOCK + 1<<"\n";
    cudaEventRecord(start);        
    encrypt<<<blockDim, gridDim>>>(d_inputImageData, d_outputImageData, width, height, d_audioData, audioSize);
    cudaEventRecord(stop);
    cudaEventSynchronize(stop);//Wait until the completion of all device work preceding the most recent call to cudaEventRecord()
    float gpuTime = 0;
    cudaEventElapsedTime(&gpuTime, start, stop);
    cudaMemcpy(outputImageData, d_outputImageData, totPixels * 3 ,cudaMemcpyDeviceToHost);
    char outputImageFile[] = "/content/parallel_output.ppm";
    writePPM(outputImageFile, outputImageData, inpImg->width, inpImg->height, 3);
    free(audioData);
    cudaFree(d_inputImageData);
    cudaFree(d_outputImageData);
    cudaFree(d_audioData);
     myfile <<(gpuTime);
     myfile<<",";
     myfile<<(THREADS_PER_BLOCK);
     myfile <<endl;
     THREADS_PER_BLOCK++;
    cout<<"GPU Time taken (encrypt) = "<<gpuTime<<" ms\n";
 
}
}

Writing parallel_encrypt.cu


In [7]:
%%shell
nvcc parallel_encrypt.cu










In [8]:
%%shell
./a.out

Size of text file = 117490 bytes (939920 bits)
Size of image file = 2045424 bytes
Blocks = 230
GPU Time taken (encrypt) = 0.246688 ms




**2) Parallel Decryption ->CUDA**

In [20]:
%%writefile parallel_decrypt.cu
#include "/content/imgh.h"
#include "/content/audh.h"
#include <bits/stdc++.h>

using namespace std;



__global__
void decrypt(unsigned char * inputImageData, int width, int height,
        char * Data, long long Size) {
    long long index = blockIdx.x * blockDim.x + threadIdx.x;

    if(index < Size){
        unsigned char Byte = 0;
        for(size_t j = 0; j < 8; j++) {
             Byte |= (inputImageData[index * 8 + j] & 1) << (7-j);
        }
        Data[index] = Byte;
    }
}


int main(){

    char const *inputImageFile = "/content/parallel_output.ppm";
    long long audioSize = 117490;
    char *outputFileExtension = ".mp3";  

  std::ofstream myfile;
  myfile.open ("CUDADecryptVaryingThreads.csv");


int THREADS_PER_BLOCK=512;//can be varied from here 
while(THREADS_PER_BLOCK<=512)
{ 
    
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    PPMimg *inpImg = readPPM(inputImageFile);
    int width = inpImg->width;
    int height = inpImg->height;
    long long totPixels = (long long)width * height;
    PPMpixel *inData = inpImg->data;
    PPMpixel *outData = (PPMpixel *)malloc(sizeof(PPMpixel) * totPixels);
    unsigned char *inputImageData = ppmTochar(inData, width, height);
    unsigned char *d_inputImageData;
    cudaMalloc((void**)&d_inputImageData, totPixels * 3ll);
    cudaMemcpy(d_inputImageData,inputImageData,totPixels * 3ll,cudaMemcpyHostToDevice);
    char *extractedAudioData = (char *)malloc(audioSize);
    char *d_extractedAudioData;
    cudaMalloc((void**)&d_extractedAudioData, audioSize);

    // Invoke Kernel
    dim3 blockDim(THREADS_PER_BLOCK, 1, 1);
    dim3 gridDim((audioSize-1)/THREADS_PER_BLOCK + 1, 1, 1);
    cudaEventRecord(start);    
    decrypt<<<blockDim, gridDim>>>(d_inputImageData, width, height, d_extractedAudioData, audioSize);
    cudaEventRecord(stop);
    cudaEventSynchronize(stop);
    float gpuTime = 0;
    cudaEventElapsedTime(&gpuTime, start, stop);
    cudaMemcpy(extractedAudioData, d_extractedAudioData, audioSize ,cudaMemcpyDeviceToHost);
    char outputAudioFile[] = "parallel_output";
    strcat(outputAudioFile, outputFileExtension);
    writeMP3(outputAudioFile, extractedAudioData, audioSize);
    cout<<"GPU Time taken (decrypt) = "<<gpuTime<<" ms\n";
    myfile <<(gpuTime);
     myfile<<",";
     myfile<<(THREADS_PER_BLOCK);
     myfile <<endl;
     THREADS_PER_BLOCK++;
}
}

Overwriting parallel_decrypt.cu


In [21]:
%%shell
nvcc parallel_decrypt.cu







In [22]:
%%shell
./a.out

GPU Time taken (decrypt) = 0.220032 ms




**Serial Encryption**

In [41]:
%%writefile encrypt.cpp
#include "/content/imgh.h"
#include "/content/audh.h"
#include <immintrin.h>
#include <iostream>
#include <fstream>
#include <cmath>
#include <functional>
#include <chrono>
#include <cassert>
#include <random>
#include <omp.h>
const std::string pendl = ".\n";
#include <bits/stdc++.h>
using namespace std;


int main(int argc, char *argv[]){

    char *inputImageFile = "/content/img.ppm";
    char *inputAudioFile = "/content/a.mp3";

    //dealing with image
     PPMimg *inpImg = readPPM(inputImageFile);
    int width = inpImg->width;
    int height = inpImg->height;
    long long totPixels = (long long)width * height;

    PPMpixel *inData = inpImg->data;
    PPMpixel *outData = (PPMpixel *)malloc(sizeof(PPMpixel) * totPixels);//size of one pixel * total pixels
    unsigned char * outputImageData = (unsigned char *)malloc(totPixels * 3ll);
    unsigned char * inputImageData = ppmTochar(inData, width, height); //converting arrays of pixels to linear array
    


     // Read input audio file
    MP3File *inpAudio = readMP3(inputAudioFile);
    char *audioData = inpAudio->data;


//------------------------------------------------------------
    cout << "Size of  file = " << inpAudio->size << " bytes ("
         << (inpAudio->size * 8) << " bits)\n";
    cout << "Size of image file = " << totPixels * 3 << " bytes\n";

    long long audioSize = inpAudio -> size; //total chunks of 1 byte read

  if((inpAudio->size * 8)> totPixels * 3ll) {
			std::cout << "ERROR: message too large to encode in input image." << pendl;
			exit(1);
		}

    int num=1,max=1;


		std::cout << "=== Beginning encoding ===" << std::endl;
	clock_t startTime = clock();

		for(size_t i = 0; i <audioSize; ++i) {
			//loop through each character in the message string
			for(size_t j = 0; j < 8; j++) {
				size_t index = (8 * i) + j;
        inputImageData[index]=(inputImageData[index] | 1) & (254 + ((audioData[i] >> (7 - j) & 1)));
			}
		}

    	clock_t endTime = clock();
	
// Time calculation
  double Time = (double)(endTime - startTime) / CLOCKS_PER_SEC;
  cout << "Time taken (encrypt) = " << Time * 1000 << " ms\n";
		std::cout << "Message successfully encoded, writing to serial_output.ppm"  << pendl;
    char outputImageFile[] = "/content/serial_output.ppm";
    writePPM(outputImageFile,inputImageData, inpImg->width, inpImg->height, 3);
    num++;
}


Overwriting encrypt.cpp


In [42]:
%%shell
g++ encrypt.cpp 

[01m[Kencrypt.cpp:[m[K In function ‘[01m[Kint main(int, char**)[m[K’:
     char *inputImageFile = [01;35m[K"/content/img.ppm"[m[K;
                            [01;35m[K^~~~~~~~~~~~~~~~~~[m[K
     char *inputAudioFile = [01;35m[K"/content/a.mp3"[m[K;
                            [01;35m[K^~~~~~~~~~~~~~~~[m[K




In [43]:
%%shell
./a.out

Size of  file = 117490 bytes (939920 bits)
Size of image file = 2045424 bytes
=== Beginning encoding ===
Time taken (encrypt) = 3.438 ms
Message successfully encoded, writing to serial_output.ppm.




**Serial Decryption** 

In [44]:
%%writefile parallel_decrypt.cpp
#include "/content/imgh.h"
#include "/content/audh.h"
#include <immintrin.h>
#include <iostream>
#include <fstream>
#include <cmath>
#include <functional>
#include <chrono>
#include <cassert>
#include <random>
#include <omp.h>
const std::string pendl = ".\n";
#include <bits/stdc++.h>
using namespace std;
int main(int argc, char *argv[]){

char const *inputImageFile = "/content/serial_output.ppm";
    long long audioSize = 117490;
    char *outputFileExtension = ".mp3";    
    
    
    // Read input image
    PPMimg *inpImg = readPPM(inputImageFile);
    int width = inpImg->width;
    int height = inpImg->height;
    long long totPixels = (long long)width * height;

    PPMpixel *inData = inpImg->data;
    PPMpixel *outData = (PPMpixel *)malloc(sizeof(PPMpixel) * totPixels);
    unsigned char *inputImageData = ppmTochar(inData, width, height);


    char *Data = (char *)malloc(audioSize);




clock_t startTime = clock();

		for(size_t i = 0; i <audioSize; ++i) {
        size_t index = (8 * i);
        unsigned char Byte = 0;
        Byte |= (inputImageData[index  + 0] & 1) << 7;
        Byte |= (inputImageData[index  + 1] & 1) << 6;
        Byte |= (inputImageData[index  + 2] & 1) << 5;
        Byte |= (inputImageData[index  + 3] & 1) << 4;
        Byte |= (inputImageData[index  + 4] & 1) << 3;
        Byte |= (inputImageData[index  + 5] & 1) << 2;
        Byte |= (inputImageData[index  + 6] & 1) << 1;
        Byte |= (inputImageData[index  + 7] & 1) << 0;
        Data[i] = Byte;
    }

	clock_t endTime = clock();
	
      // Time calculation
  double Time = (double)(endTime - startTime) / CLOCKS_PER_SEC;
  cout << "Time taken (decrypt) = " << Time * 1000 << " ms\n";

    // Writing back audio file
    char outputAudioFile[] = "parallel_output";
    strcat(outputAudioFile, outputFileExtension);
    writeMP3(outputAudioFile, Data, audioSize);
    //--------------------------------------------------------------------------//
}

Writing parallel_decrypt.cpp


In [45]:
%%shell
g++ decrypt.cpp



In [46]:
%%shell
./a.out

CPU Time taken (decrypt) = 0.886 ms




# **------------------------------------------------------------------------------------------------**