In [None]:
%%writefile 1.cu

#include <stdio.h>

__constant__ float conversionRates[5];

enum CurrencyCode {INR =0, EUR = 1, GBP = 2, JPY = 3, AUD = 4};

__global__ void convertToUSD(float *output, const float *amount, const int *currencyIndices, int numTrans){
  int i = threadIdx.x + blockIdx.x * blockDim.x;
  if ( i < numTrans && currencyIndices[i] > INR && currencyIndices[i] <= AUD){
    output[i] = amount[i] / conversionRates[currencyIndices[i]];
  }
}

int main(){
  float h_conversionRates[5] = {84.0f, 0.95f, 0.78f, 130.0f, 1.5f};

  cudaMemcpyToSymbol(conversionRates, h_conversionRates, 5 * sizeof(float));

  float h_amounts[5] = {50000.0f, 1000.0f, 2000.0f, 75000.0f, 3500.0f};

  CurrencyCode h_currencyCodes[5] = {INR, EUR, GBP, JPY, AUD};

  int h_currencyIndices[5];
  for(int i=0; i<5; i++){
    h_currencyIndices[i] = (int)h_currencyCodes[i];
  }

  float *d_amounts, *d_output;
  int *d_currencyIndices;
  float h_output[5];

  cudaMalloc(&d_amounts, 5 * sizeof(float));
  cudaMalloc(&d_output, 5 * sizeof(float));
  cudaMalloc(&d_currencyIndices, 5 * sizeof(int));

  cudaMemcpy(d_amounts, h_amounts, 5 * sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_currencyIndices, h_currencyIndices, 5 * sizeof(int), cudaMemcpyHostToDevice);

  int threadsPerBlock = 5;
  int blocksPerGrid = (5 + threadsPerBlock - 1) / threadsPerBlock;

  convertToUSD<<<blocksPerGrid, threadsPerBlock>>>(d_output, d_amounts, d_currencyIndices, 5);

  cudaMemcpy(h_output, d_output, 5 * sizeof(float), cudaMemcpyDeviceToHost);

  const char* currencyNames[] = {"INR", "EUR", "GBP", "JPY", "AUD"};

  for(int i=0; i<5; i++){
    printf("Transaction %d (%s %.2f): Equivalent USD amount = %.2f\n", i+1, currencyNames[h_currencyIndices[i]], h_amounts[i], h_output[i]);
  }

  cudaFree(d_amounts);
  cudaFree(d_output);
  cudaFree(d_currencyIndices);

  return 0;

}

Overwriting 1.cu


In [None]:
!nvcc -o 1 1.cu

In [None]:
!./1

Transaction 1 (INR 50000.00): Equivalent USD amount = 0.00
Transaction 2 (EUR 1000.00): Equivalent USD amount = 1052.63
Transaction 3 (GBP 2000.00): Equivalent USD amount = 2564.10
Transaction 4 (JPY 75000.00): Equivalent USD amount = 576.92
Transaction 5 (AUD 3500.00): Equivalent USD amount = 2333.33


## Shared Memory

In [None]:
%%writefile 2.cu

#include <stdio.h>
#define N 15

__global__ void sumWithSharedMem(int *d_input, int *d_output, int n){
  __shared__ int sdata[N];

  int idx = blockIdx.x * blockDim.x + threadIdx.x;

  if(idx < n){
    sdata[threadIdx.x] = d_input[idx];
  }else{
    sdata[threadIdx.x] = 0;
  }

  __syncthreads();

  if (threadIdx.x == 0){
    int blockSum =0;
    for(int i=0; i<N; i++){
      blockSum += sdata[i];
    }
    d_output[blockIdx.x] = blockSum;
  }
}

int main(){
  int h_input[N], h_output[N / 4], finalSum =0;
  int *d_input, *d_output;

  for(int i=0; i<N; i++){
    h_input[i] = 1;
  }

  cudaMalloc((void **) &d_input, N * sizeof(int));
  cudaMalloc((void **) &d_output, (N/4) * sizeof(int));

  cudaMemcpy(d_input, h_input, N *sizeof(int), cudaMemcpyHostToDevice);

  sumWithSharedMem<<<4, 4>>>(d_input, d_output, N);

  cudaMemcpy(h_output, d_output, (N/4) * sizeof(int), cudaMemcpyDeviceToHost);

  for(int i=0; i<4; i++){
    finalSum += h_output[i];
  }

  printf("Final Sum: %d\n", finalSum);

  cudaFree(d_input);
  cudaFree(d_output);

  return 0;
}


Overwriting 2.cu


In [None]:
!nvcc -o 2 2.cu

In [None]:
!./2

Final Sum: 13


## Assingment Q1


In [None]:
%%writefile 3.cu

#include <stdio.h>
#define N 160

__global__ void MaxMin(int *d_input, int *d_outputMax, int *d_outputMin, int n) {
  __shared__ int sharedDataMax[8];
  __shared__ int sharedDataMin[8];

  int id = blockIdx.x * blockDim.x + threadIdx.x;
  int tid = threadIdx.x;

  if (id < n) {
    sharedDataMax[tid] = d_input[id];
    sharedDataMin[tid] = d_input[id];
  }

  __syncthreads();

  for (int i = blockDim.x / 2; i > 0; i >>= 1) {
    if (tid < i) {
      sharedDataMax[tid] = max(sharedDataMax[tid], sharedDataMax[tid + i]);
      sharedDataMin[tid] = min(sharedDataMin[tid], sharedDataMin[tid + i]);
    }
    __syncthreads();
  }

  if (tid == 0) {
    d_outputMax[blockIdx.x] = sharedDataMax[0];
    d_outputMin[blockIdx.x] = sharedDataMin[0];
  }
}

int main() {
  int h_input[N], h_outputMax[N / 8], h_outputMin[N / 8];
  int *d_input, *d_outputMax, *d_outputMin;
  int finalMax =0, finalMin = 0;

  for (int i = 0; i < N; i++) {
    h_input[i] = i + 1;  // Example data: 1 to 160
  }

  cudaMalloc((void **)&d_input, N * sizeof(int));
  cudaMalloc((void **)&d_outputMax, (N / 8) * sizeof(int));
  cudaMalloc((void **)&d_outputMin, (N / 8) * sizeof(int));

  cudaMemcpy(d_input, h_input, N * sizeof(int), cudaMemcpyHostToDevice);

  MaxMin<<<20, 8>>>(d_input, d_outputMax, d_outputMin, N);

  cudaMemcpy(h_outputMax, d_outputMax, 20 * sizeof(int), cudaMemcpyDeviceToHost);
  cudaMemcpy(h_outputMin, d_outputMin, 20 * sizeof(int), cudaMemcpyDeviceToHost);

  for (int i = 0; i < 20; i++) {
    printf("Max of block [%d] is %d\n", i, h_outputMax[i]);
    printf("Min of block [%d] is %d\n", i, h_outputMin[i]);
  }
  printf("\n");

  for(int i=0 ; i<20; i++){

    if(h_outputMax[i] > finalMax){
      finalMax = h_outputMax[i];
    }
    if(h_outputMin[i] < finalMin){
      finalMin = h_outputMin[i];
    }
  }
  printf("Max of all blocks is %d\n", finalMax);
  printf("Min of all blocks is %d\n", finalMin);

  cudaFree(d_input);
  cudaFree(d_outputMax);
  cudaFree(d_outputMin);

  return 0;
}


Overwriting 4.cu


In [None]:
!nvcc -o 3 3.cu

In [None]:
!./3

Max of block [0] is 8
Min of block [0] is 1
Max of block [1] is 16
Min of block [1] is 9
Max of block [2] is 24
Min of block [2] is 17
Max of block [3] is 32
Min of block [3] is 25
Max of block [4] is 40
Min of block [4] is 33
Max of block [5] is 48
Min of block [5] is 41
Max of block [6] is 56
Min of block [6] is 49
Max of block [7] is 64
Min of block [7] is 57
Max of block [8] is 72
Min of block [8] is 65
Max of block [9] is 80
Min of block [9] is 73
Max of block [10] is 88
Min of block [10] is 81
Max of block [11] is 96
Min of block [11] is 89
Max of block [12] is 104
Min of block [12] is 97
Max of block [13] is 112
Min of block [13] is 105
Max of block [14] is 120
Min of block [14] is 113
Max of block [15] is 128
Min of block [15] is 121
Max of block [16] is 136
Min of block [16] is 129
Max of block [17] is 144
Min of block [17] is 137
Max of block [18] is 152
Min of block [18] is 145
Max of block [19] is 160
Min of block [19] is 153

Max of all blocks is 160
Min of all blocks is 0

## Que2

In [None]:
%%writefile 4.cu   ///Re-Run This Program

#include <stdio.h>
#include <cuda_runtime.h>

#define N 160  // Total numbers from 0 to 159
#define BLOCK_SIZE 16  // Number of threads per block

__global__ void countEvenOdd(int *d_data, int *d_even_count, int *d_odd_count) {
    extern __shared__ int shared_counts[];  // Shared memory to store even and odd counts per block

    int idx = blockIdx.x * blockDim.x + threadIdx.x;  // Global index
    int tid = threadIdx.x;

    // Initialize shared memory for even and odd counts for this block
    if (tid == 0) {
        shared_counts[0] = 0;  // even count
        shared_counts[1] = 0;  // odd count
    }

    __syncthreads();  // Synchronize threads in the block

    // Each thread checks if its corresponding number is even or odd
    if (idx < N) {
        if (d_data[idx] % 2 == 0) {
            atomicAdd(&shared_counts[0], 1);  // Increment even count atomically
        } else {
            atomicAdd(&shared_counts[1], 1);  // Increment odd count atomically
        }
    }

    __syncthreads();  // Synchronize threads to ensure the counts are updated

    // First thread in each block writes the block's even and odd counts to global memory
    if (tid == 0) {
        atomicAdd(d_even_count, shared_counts[0]);
        atomicAdd(d_odd_count, shared_counts[1]);
    }
}

int main() {
    int h_data[N], h_even_count = 0, h_odd_count = 0;
    int *d_data, *d_even_count, *d_odd_count;

    // Initialize the data array (0 to 159)
    for (int i = 0; i < N; i++) {
        h_data[i] = i;  // Numbers 0 to 159
    }

    // Allocate memory on the device
    cudaMalloc((void **)&d_data, N * sizeof(int));
    cudaMalloc((void **)&d_even_count, sizeof(int));
    cudaMalloc((void **)&d_odd_count, sizeof(int));

    // Initialize the count variables to 0
    cudaMemcpy(d_even_count, &h_even_count, sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_odd_count, &h_odd_count, sizeof(int), cudaMemcpyHostToDevice);

    // Copy data from host to device
    cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice);

    // Launch the kernel with N / BLOCK_SIZE blocks, and BLOCK_SIZE threads per block
    int sharedMemSize = 2 * sizeof(int);  // Shared memory size for even and odd counts (2 integers)
    countEvenOdd<<<N / BLOCK_SIZE, BLOCK_SIZE, sharedMemSize>>>(d_data, d_even_count, d_odd_count);

    // Wait for the kernel to finish
    cudaDeviceSynchronize();

    // Copy the final counts back to the host
    cudaMemcpy(&h_even_count, d_even_count, sizeof(int), cudaMemcpyDeviceToHost);
    cudaMemcpy(&h_odd_count, d_odd_count, sizeof(int), cudaMemcpyDeviceToHost);

    // Print the results
    printf("Even Count: %d\n", h_even_count);
    printf("Odd Count: %d\n", h_odd_count);

    // Free device memory
    cudaFree(d_data);
    cudaFree(d_even_count);
    cudaFree(d_odd_count);

    return 0;
}


Writing 5.cu


In [None]:
!nvcc -o 4 4.cu

[01m[K5.cu:[m[K In function ‘[01m[Kint main()[m[K’:
   52 |     pri[01;35m[Kntf("Odd Sum of block [%d] is %[m[Kd\n",[32m[K i, h_OddOu[m[Ktput);
      |        [01;35m[K^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[m[K     [32m[K~~~~~~~~~~~[m[K
      |                                            [32m[K|[m[K
      |                                            [32m[Kint*[m[K


In [None]:
!./4

Even Sum of block [0] is 156
Odd Sum of block [0] is 689677664
Even Sum of block [1] is 156
Odd Sum of block [1] is 689677664
Even Sum of block [2] is 156
Odd Sum of block [2] is 689677664
Even Sum of block [3] is 156
Odd Sum of block [3] is 689677664
Even Sum of block [4] is 156
Odd Sum of block [4] is 689677664
Even Sum of block [5] is 156
Odd Sum of block [5] is 689677664
Even Sum of block [6] is 156
Odd Sum of block [6] is 689677664
Even Sum of block [7] is 156
Odd Sum of block [7] is 689677664
Even Sum of block [8] is 156
Odd Sum of block [8] is 689677664
Even Sum of block [9] is 156
Odd Sum of block [9] is 689677664
Even Sum of block [10] is 156
Odd Sum of block [10] is 689677664
Even Sum of block [11] is 156
Odd Sum of block [11] is 689677664
Even Sum of block [12] is 156
Odd Sum of block [12] is 689677664
Even Sum of block [13] is 156
Odd Sum of block [13] is 689677664
Even Sum of block [14] is 156
Odd Sum of block [14] is 689677664
Even Sum of block [15] is 156
Odd Sum of bloc