<a href="https://colab.research.google.com/github/ArifIkbal140/Parallel_Processing/blob/main/cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%%writefile search_phonebook.cu
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cstring>
#include <cuda_runtime.h>

using namespace std;

// ================================
// Contact struct
// ================================
struct Contact {
    char name[50];
    char number[50];
};

// ================================
// Device function: substring check
// ================================
__device__ bool isSubstring(const char* text, const char* pattern) {
    for (int i = 0; text[i] != '\0'; i++) {
        int j = 0;
        while (text[i+j] != '\0' && pattern[j] != '\0' && text[i+j] == pattern[j]) {
            j++;
        }
        if (pattern[j] == '\0') return true;
    }
    return false;
}

// ================================
// Kernel: search phonebook (name OR number)
// ================================
__global__ void searchPhonebook(Contact* d_contacts, int num_contacts,
                                char* d_pattern, int* d_results) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx >= num_contacts) return;

    if (isSubstring(d_contacts[idx].name, d_pattern) ||
        isSubstring(d_contacts[idx].number, d_pattern)) {
        d_results[idx] = 1; // match
    } else {
        d_results[idx] = 0;
    }
}

// ================================
// Main
// ================================
int main(int argc, char* argv[]) {

    if (argc != 3) {
        cout << "Usage: ./search_phonebook <search_pattern> <threads>\n";
        return 0;
    }

    string search_pattern = argv[1];
    int threads = atoi(argv[2]);

    // ================================
    // Read phonebook file
    // ================================
    string file_name = "phonebook1.txt"; // change path if needed
    ifstream file(file_name);
    if (!file) {
        cerr << "Error opening file: " << file_name << endl;
        return 1;
    }

    vector<Contact> contacts;
    string line;
    while (getline(file, line)) {
        if (line.empty()) continue;

        Contact c;
        int pos = line.find(",");
        if (pos != string::npos) {
            // Remove quotes
            string name = line.substr(1, pos - 2);
            string number = line.substr(pos + 2, line.size() - pos - 3);

            strncpy(c.name, name.c_str(), sizeof(c.name)-1);
            c.name[sizeof(c.name)-1] = '\0';
            strncpy(c.number, number.c_str(), sizeof(c.number)-1);
            c.number[sizeof(c.number)-1] = '\0';

            contacts.push_back(c);
        }
    }
    file.close();

    int n = contacts.size();
    cout << "Total contacts: " << n << endl;

    // ================================
    // Allocate device memory
    // ================================
    Contact* d_contacts;
    char* d_pattern;
    int* d_results;

    cudaMalloc(&d_contacts, n * sizeof(Contact));
    cudaMalloc(&d_pattern, search_pattern.size() + 1);
    cudaMalloc(&d_results, n * sizeof(int));

    cudaMemcpy(d_contacts, contacts.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);
    cudaMemcpy(d_pattern, search_pattern.c_str(), search_pattern.size() + 1, cudaMemcpyHostToDevice);

    // ================================
    // Launch kernel
    // ================================
    int blocks = (n + threads - 1) / threads;
    searchPhonebook<<<blocks, threads>>>(d_contacts, n, d_pattern, d_results);

    // Error check
    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        cerr << "CUDA Error: " << cudaGetErrorString(err) << endl;
        return 1;
    }

    cudaDeviceSynchronize();

    // ================================
    // Copy results back
    // ================================
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);

    // ================================
    // Print matches
    // ================================
    cout << "Matched contacts:\n";
    for (int i = 0; i < n; i++) {
        if (h_results[i]) {
            cout << contacts[i].name << " , " << contacts[i].number << endl;
        }
    }

    // ================================
    // Cleanup
    // ================================
    cudaFree(d_contacts);
    cudaFree(d_pattern);
    cudaFree(d_results);
    free(h_results);

    return 0;
}




Overwriting search_phonebook.cu


In [5]:
!nvcc -arch=sm_75 search_phonebook.cu -o search_phonebook

In [6]:
!time ./search_phonebook AKTER 100 > output1.txt


real	0m0.308s
user	0m0.021s
sys	0m0.221s


In [7]:


%%writefile matrix_array_mul.cu

#include <bits/stdc++.h>
#include <cuda.h>
#include <cuda_runtime.h>

using namespace std;

// CUDA Kernel: C[k][i][j] = sum over l (A[k][i][l] * B[k][l][j])
__global__ void matrixArrayMultiply(
    int *A, int *B, int *C,
    int k, int m, int n, int p)
{
    int mat = blockIdx.z;
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    if (mat < k && row < m && col < p) {
        int sum = 0;
        for (int l = 0; l < n; l++) {
            sum += A[mat * m * n + row * n + l] *
                   B[mat * n * p + l * p + col];
        }
        C[mat * m * p + row * p + col] = sum;
    }
}

int main() {
    // Dimensions
    const int K = 4;
    const int M = 4;
    const int N = 4;
    const int P = 4;

    const int sizeA = K * M * N;
    const int sizeB = K * N * P;
    const int sizeC = K * M * P;

    vector<int> h_A(sizeA), h_B(sizeB), h_C(sizeC);

    srand(42);
    for (int i = 0; i < sizeA; i++) h_A[i] = rand() % 10;
    for (int i = 0; i < sizeB; i++) h_B[i] = rand() % 10;

    int *d_A, *d_B, *d_C;
    cudaMalloc(&d_A, sizeA * sizeof(int));
    cudaMalloc(&d_B, sizeB * sizeof(int));
    cudaMalloc(&d_C, sizeC * sizeof(int));

    cudaMemcpy(d_A, h_A.data(), sizeA * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_B, h_B.data(), sizeB * sizeof(int), cudaMemcpyHostToDevice);

    dim3 block(16, 16);
    dim3 grid(
        (P + block.x - 1) / block.x,
        (M + block.y - 1) / block.y,
        K
    );

    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);

    cudaEventRecord(start);
    matrixArrayMultiply<<<grid, block>>>(d_A, d_B, d_C, K, M, N, P);
    cudaDeviceSynchronize();
    cudaEventRecord(stop);
    cudaEventSynchronize(stop);

    float ms;
    cudaEventElapsedTime(&ms, start, stop);

    cudaMemcpy(h_C.data(), d_C, sizeC * sizeof(int), cudaMemcpyDeviceToHost);

    cout << "\nExecution Time: " << ms << " ms\n";

    // Print all 4 matrix sets
    for (int mat = 0; mat < K; mat++) {
        cout << "\n============================\n";
        cout << "Matrix Set " << mat << "\n";
        cout << "============================\n";

        cout << "\nMatrix A[" << mat << "]:\n";
        for (int i = 0; i < M; i++) {
            for (int j = 0; j < N; j++) {
                cout << h_A[mat * M * N + i * N + j] << " ";
            }
            cout << endl;
        }

        cout << "\nMatrix B[" << mat << "]:\n";
        for (int i = 0; i < N; i++) {
            for (int j = 0; j < P; j++) {
                cout << h_B[mat * N * P + i * P + j] << " ";
            }
            cout << endl;
        }

        cout << "\nMatrix C[" << mat << "] (Result):\n";
        for (int i = 0; i < M; i++) {
            for (int j = 0; j < P; j++) {
                cout << h_C[mat * M * P + i * P + j] << " ";
            }
            cout << endl;
        }
    }

    cudaFree(d_A);
    cudaFree(d_B);
    cudaFree(d_C);
    cudaEventDestroy(start);
    cudaEventDestroy(stop);

    return 0;
}

Writing matrix_array_mul.cu


In [8]:
!nvcc matrix_array_mul.cu -o matrix_array_mul -arch=sm_75

In [9]:
!./matrix_array_mul


Execution Time: 0.125344 ms

Matrix Set 0

Matrix A[0]:
6 0 1 1 
2 8 1 0 
5 3 4 3 
7 4 6 2 

Matrix B[0]:
5 7 0 0 
0 3 0 5 
2 7 2 3 
8 1 9 0 

Matrix C[0] (Result):
40 50 11 3 
12 45 2 43 
57 75 35 27 
63 105 30 38 

Matrix Set 1

Matrix A[1]:
2 8 8 9 
7 2 9 3 
7 9 9 4 
1 6 3 7 

Matrix B[1]:
8 3 1 0 
9 5 9 4 
2 5 2 4 
6 9 6 1 

Matrix C[1] (Result):
158 167 144 73 
110 103 61 47 
179 147 130 76 
110 111 103 43 

Matrix Set 2

Matrix A[2]:
6 4 0 0 
4 1 1 0 
5 7 5 4 
1 1 7 3 

Matrix B[2]:
6 8 1 6 
4 3 4 6 
0 8 0 1 
9 9 3 9 

Matrix C[2] (Result):
52 60 22 60 
28 43 8 31 
94 137 45 113 
37 94 14 46 

Matrix Set 3

Matrix A[3]:
0 7 4 9 
0 3 4 9 
5 5 3 8 
1 9 5 0 

Matrix B[3]:
4 5 0 3 
2 1 9 5 
8 1 1 4 
2 9 6 0 

Matrix C[3] (Result):
64 92 121 51 
56 88 85 31 
70 105 96 52 
62 19 86 68 


In [None]:

%%writefile search_phonebook_single_list.cu
#include <bits/stdc++.h>
#include <cuda.h>
#include <cuda_runtime.h>


using namespace std;


#define MAX_STR_LEN 50


// Struct to hold one contact (name + number)
struct Contact {
    string name;
    string number;
};


// For sorting results
struct ResultContact {
    string name;
    string number;


    bool operator<(const ResultContact& other) const {
        return name < other.name;
    }
};


// Device substring check
__device__ bool check(const char* str1, const char* str2, int len) {
    for (int i = 0; str1[i] != '\0'; ++i) {
        int j = 0;
        while (str1[i + j] != '\0' && j < len && str1[i + j] == str2[j]) {
            ++j;
        }
        if (j == len) {
            return true;
        }
    }
    return false;
}


// CUDA kernel - same as before
__global__ void searchPhonebook(
    char* d_names,
    int num_contacts,
    char* search_name,
    int search_len,
    int* d_results
) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;


    if (idx < num_contacts) {
        char* current_name = d_names + idx * MAX_STR_LEN;
        d_results[idx] = check(current_name, search_name, search_len) ? 1 : 0;
    }
}


int main(int argc, char* argv[]) {


    if (argc != 3) {
        cerr << "Usage: " << argv[0] << " <search_string> <threads_per_block>\n";
        return 1;
    }


    string search_string = argv[1];
    int threads_per_block = atoi(argv[2]);


    string file_name = "/content/phonebook1.txt";


    // Single vector for all contacts
    vector<Contact> contacts;


    ifstream file(file_name);
    if (!file.is_open()) {
        cerr << "Error opening file: " << file_name << endl;
        return 1;
    }


    string line;
    while (getline(file, line)) {
        if (line.empty()) continue;


        // Find the comma that separates name and number
        size_t pos = line.find("\",\"");
        if (pos == string::npos) continue;


        // Extract name (remove first and last quote)
        string name = line.substr(1, pos - 1);          // from after first " to before comma
        string number = line.substr(pos + 3, line.size() - pos - 4); // after "," to before last "


        contacts.push_back({name, number});
    }
    file.close();


    int num_contacts = contacts.size();
    if (num_contacts == 0) {
        cerr << "No contacts found.\n";
        return 1;
    }


    // Host memory for names only (for GPU search)
    char* h_names = (char*)malloc(num_contacts * MAX_STR_LEN);
    int* h_results = (int*)malloc(num_contacts * sizeof(int));


    // Copy only names to flat array for GPU
    for (int i = 0; i < num_contacts; ++i) {
        strncpy(h_names + i * MAX_STR_LEN,
                contacts[i].name.c_str(),
                MAX_STR_LEN - 1);
        h_names[i * MAX_STR_LEN + MAX_STR_LEN - 1] = '\0';
    }


    // Device memory
    char *d_names, *d_search_name;
    int* d_results;


    int search_len = search_string.length();


    cudaMalloc(&d_names, num_contacts * MAX_STR_LEN);
    cudaMalloc(&d_results, num_contacts * sizeof(int));
    cudaMalloc(&d_search_name, search_len + 1);


    cudaMemcpy(d_names, h_names,
               num_contacts * MAX_STR_LEN,
               cudaMemcpyHostToDevice);


    cudaMemcpy(d_search_name, search_string.c_str(),
               search_len + 1,
               cudaMemcpyHostToDevice);


    // Launch kernel
    int blocks = (num_contacts + threads_per_block - 1) / threads_per_block;
    searchPhonebook<<<blocks, threads_per_block>>>(
        d_names, num_contacts, d_search_name, search_len, d_results
    );


    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        cerr << "CUDA kernel error: " << cudaGetErrorString(err) << endl;
        return 1;
    }


    cudaDeviceSynchronize();


    // Get results back
    cudaMemcpy(h_results, d_results,
               num_contacts * sizeof(int),
               cudaMemcpyDeviceToHost);


    // Collect matching contacts
    vector<ResultContact> matched_contacts;
    for (int i = 0; i < num_contacts; ++i) {
        if (h_results[i] == 1) {
            matched_contacts.push_back({
                contacts[i].name,
                contacts[i].number
            });
        }
    }


    // Sort by name
    sort(matched_contacts.begin(), matched_contacts.end());


    // Print results
    cout << "\nSearch Results (Ascending Order):\n";
    if (matched_contacts.empty()) {
        cout << "No matches found.\n";
    } else {
        for (const auto& c : matched_contacts) {
            cout << c.name << " " << c.number << endl;
        }
    }


    // Cleanup
    free(h_names);
    free(h_results);
    cudaFree(d_names);
    cudaFree(d_results);
    cudaFree(d_search_name);


    return 0;
}
/**
!nvcc -arch=compute_75 -code=sm_75 search_phonebook_single_list.cu -o search_phonebook
!time ./search_phonebook TANBIR 256

*/

Overwriting search_phonebook_single_list.cu


In [None]:
!nvcc -arch=compute_75 -code=sm_75 search_phonebook_single_list.cu -o search_phonebook


In [None]:
!time ./search_phonebook AKTER 256



Search Results (Ascending Order):
SAZNIN AKTER ZITU 016 16 217"
SUMAIYA AKTER SWEETY 018 07 741"
SUNJIDA AKTER NIPA 012 20 350"

real	0m0.177s
user	0m0.019s
sys	0m0.132s


In [10]:




%%writefile search_any_part_sorted.cu
#include <bits/stdc++.h>
#include <cuda.h>
#include <cuda_runtime.h>
using namespace std;

#define MAX_LINE_LEN 300

__device__ bool has_substring(const char* text, const char* pat, int pat_len) {
    if (pat_len == 0) return true;
    for (int i = 0; text[i] != '\0'; ++i) {
        int j = 0;
        while (text[i + j] != '\0' && j < pat_len && text[i + j] == pat[j]) ++j;
        if (j == pat_len) return true;
    }
    return false;
}

__global__ void searchKernel(
    char* d_lines,
    int num_lines,
    char* d_pat,
    int pat_len,
    int* d_match
) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx >= num_lines) return;
    char* line = d_lines + idx * MAX_LINE_LEN;
    d_match[idx] = has_substring(line, d_pat, pat_len) ? 1 : 0;
}

// Comparator: index অনুযায়ী sort করার জন্য
struct LineWithIndex {
    string line;
    int index;
};

bool cmp(const LineWithIndex& a, const LineWithIndex& b) {
    return a.index < b.index;
}

int main(int argc, char** argv) {
    if (argc != 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <threads_per_block>\n";
        return 1;
    }

    string search_term = argv[1];
    int threads = atoi(argv[2]);

    string fname = "/content/phonebook1.txt";
    vector<string> original_lines;

    ifstream file(fname);
    if (!file) {
        cerr << "File not found: " << fname << endl;
        return 1;
    }

    string ln;
    while (getline(file, ln)) {
        if (!ln.empty()) original_lines.push_back(ln);
    }
    file.close();

    int n = original_lines.size();
    if (n == 0) {
        cerr << "No lines in file\n";
        return 1;
    }

    cout << "Loaded " << n << " lines.\n";

    char* h_lines = (char*)malloc(n * MAX_LINE_LEN);
    for (int i = 0; i < n; ++i) {
        strncpy(h_lines + i * MAX_LINE_LEN, original_lines[i].c_str(), MAX_LINE_LEN - 1);
        h_lines[i * MAX_LINE_LEN + MAX_LINE_LEN - 1] = '\0';
    }

    char *d_lines, *d_pat;
    int* d_match;

    int pat_len = search_term.length();

    cudaMalloc(&d_lines, n * MAX_LINE_LEN);
    cudaMalloc(&d_match, n * sizeof(int));
    cudaMalloc(&d_pat, pat_len + 1);

    cudaMemcpy(d_lines, h_lines, n * MAX_LINE_LEN, cudaMemcpyHostToDevice);
    cudaMemcpy(d_pat, search_term.c_str(), pat_len + 1, cudaMemcpyHostToDevice);

    int blocks = (n + threads - 1) / threads;
    searchKernel<<<blocks, threads>>>(d_lines, n, d_pat, pat_len, d_match);

    cudaDeviceSynchronize();

    vector<int> match(n);
    cudaMemcpy(match.data(), d_match, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Collect results with their original index
    vector<LineWithIndex> results;
    for (int i = 0; i < n; ++i) {
        if (match[i]) {
            results.push_back({original_lines[i], i + 1});  // 1-based index
        }
    }

    // Sort by index (numerical order)
    sort(results.begin(), results.end(), cmp);

    // Print
    cout << "\nResults for \"" << search_term << "\":\n";
    if (results.empty()) {
        cout << "No matches found.\n";
    } else {
        cout << "Found " << results.size() << " lines (sorted by index):\n\n";
        for (const auto& r : results) {
            cout << r.line << endl;
        }
    }

    free(h_lines);
    cudaFree(d_lines);
    cudaFree(d_pat);
    cudaFree(d_match);

    return 0;
}

/**
!nvcc -arch=compute_75 -code=sm_75 search_any_part_sorted.cu -o search_any_sorted
!time ./search_any_sorted TANBIR 256
*/


Writing search_any_part_sorted.cu


In [11]:
!nvcc -arch=compute_75 -code=sm_75 search_any_part_sorted.cu -o search_any_sorted


In [12]:
!time ./search_any_sorted "AKTER" 256


Loaded 1001 lines.

Results for "AKTER":
Found 171 lines (sorted by index):

"SAZNIN AKTER ZITU","016 16 217"
"SUMAIYA AKTER SWEETY","018 07 741"
"SUNJIDA AKTER NIPA","012 20 350"
"SUMAIA AKTER TISHA","011 77 602"
"FARJANA AKTER POPY","014 27 168"
"MOSAMMAD SHARMIN AKTER","015 10 657"
"MOSS. SANTA AKTER","013 22 453"
"FARJANA AKTER","017 62 062"
"SADIA AKTER TANIA","017 27 203"
"MOHIMA AKTER SATHI","016 67 817"
"SALMA AKTER","017 25 440"
"RIMA AKTER","017 61 410"
"HABIBA AKTER","015 68 285"
"SUMAIA AKTER","014 20 434"
"KHADIJA AKTER","014 42 867"
"RANU AKTER","011 50 385"
"LAKY AKTER","011 64 153"
"SHAMIMA AKTER MOYNA","016 57 361"
"ROHIMA AKTER RITU","018 06 242"
"MARZIA AKTER","016 08 676"
"ZAFRIN AKTER","013 23 148"
"RESMA AKTER","015 53 362"
"MAHMUDA AKTER","011 77 685"
"MOSA. MITU AKTER","012 47 378"
"MST. YESMIN AKTER IMA","018 17 622"
"HUMIRA AKTER","014 54 775"
"SADIA AKTER","015 48 225"
"SABIHA AKTER","015 86 778"
"FATEMA AKTER EFA","017 20 204"
"N