<a href="https://colab.research.google.com/github/Mohon127/Parallel_Processing_using_CUDA/blob/main/sub_str_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile matrix.cu
#include <iostream>
#include <cuda_runtime.h>
#include <chrono>
using namespace std;

__global__ void matrixMul(float *A, float *B, float *R, int M, int N, int P, int batchOffset) {
    int k = threadIdx.x + batchOffset;   // one thread per matrix
    if (k >= gridDim.x * blockDim.x) return;

    float *a = A + k * M * N;
    float *b = B + k * N * P;
    float *r = R + k * M * P;

    // compute matrix multiplication
    for (int i = 0; i < M; i++) {
        for (int l = 0; l < P; l++) {
            r[i * P + l] = 0.0f;
            for (int j = 0; j < N; j++) {
                r[i * P + l] += a[i * N + j] * b[j * P + l];
            }
        }
    }
}

// print one matrix at given index
void printMatrixAtIndex(float *A, int index, int M, int N) {
    int offset = index * M * N;
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < N; j++) {
            cout << A[offset + i * N + j] << " ";
        }
        cout << endl;
    }
}

int main(int argc, char* argv[]) {
    if (argc < 6) {
        cout << "Usage: ./matrix <threads> <k> <m> <n> <p>" << endl;
        return 1;
    }

    int threads = atoi(argv[1]); // threads per block
    int K = atoi(argv[2]);
    int M = atoi(argv[3]);
    int N = atoi(argv[4]);
    int P = atoi(argv[5]);

    int sizeA = K * M * N;
    int sizeB = K * N * P;
    int sizeR = K * M * P;

    // Host memory
    float *h_A = (float*)malloc(sizeA * sizeof(float));
    float *h_B = (float*)malloc(sizeB * sizeof(float));
    float *h_R = (float*)malloc(sizeR * sizeof(float));

    // Initialize random matrices
    for (int i = 0; i < sizeA; i++) h_A[i] = rand() % 10;
    for (int i = 0; i < sizeB; i++) h_B[i] = rand() % 10;

    // Device memory
    float *d_A, *d_B, *d_R;
    cudaMalloc(&d_A, sizeA * sizeof(float));
    cudaMalloc(&d_B, sizeB * sizeof(float));
    cudaMalloc(&d_R, sizeR * sizeof(float));

    cudaMemcpy(d_A, h_A, sizeA * sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_B, h_B, sizeB * sizeof(float), cudaMemcpyHostToDevice);
    cudaMemset(d_R, 0, sizeR * sizeof(float));

    int remaining = K;
    int batchOffset = 0;
    while (remaining > 0) {
        int currentBatchSize = min(remaining, threads);
        matrixMul<<<1, currentBatchSize>>>(d_A, d_B, d_R, M, N, P, batchOffset);
        cudaDeviceSynchronize();
        remaining -= currentBatchSize;
        batchOffset += currentBatchSize;
    }



    // Copy result back
    cudaMemcpy(h_R, d_R, sizeR * sizeof(float), cudaMemcpyDeviceToHost);

    // Output
    if (K > 9) {
        cout << "Matrix A[9]:" << endl;
        printMatrixAtIndex(h_A, 9, M, N);

        cout << "Matrix B[9]:" << endl;
        printMatrixAtIndex(h_B, 9, N, P);

        cout << "Matrix C[9]:" << endl;
        printMatrixAtIndex(h_R, 9, M, P);
    } else {
        cout << "Error: K <= 9, so A[9], B[9], C[9] do not exist." << endl;
    }

    // Cleanup
    cudaFree(d_A); cudaFree(d_B); cudaFree(d_R);
    free(h_A); free(h_B); free(h_R);
    return 0;
}


In [1]:
%%writefile phonebook_search.cu
/*
Normal searching without, sorting the results
*/

#include <bits/stdc++.h>
using namespace std;
#include <cuda.h>

struct Contact{
    char name[65];
    char phone_number[65];
};


string getInput(ifstream& file){
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)){
        if(c == '\"'){
            if(readSuru == 1) break;
            readSuru = 1;
        }else{
            if(readSuru){
                ans.push_back(c);
            }
        }
    }
    return ans;
}

__device__ bool check(char* str1, char* str2){
    for(int i = 0; str1[i] != '\0'; i++){
        int flag = 1;
        for(int j = 0; str2[j] != '\0' ; j++){
            if(str1[i + j] != str2[j]){
                flag = 0;
                break;
            }
        }
        if(flag == 1) return true;
    }
    return false;
}


__global__ void myKernel(Contact* phoneBook, char* pat, int offset){
    int threadNumber = threadIdx.x + offset;
    if(check(phoneBook[threadNumber].phone_number, pat)){
        printf("%s %s\n", phoneBook[threadNumber].name, phoneBook[threadNumber].phone_number);
    }
}



int main(int argc, char* argv[])
{
    int threadLimit = atoi(argv[2]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int count = 0;

    while(myfile.peek() != EOF){

        if(count > 10000) break;
        count++;

        string name = getInput(myfile);
        string phoneNum = getInput(myfile);

        //cout << name << " " << phoneNum << endl;

        Contact c;
        strcpy(c.name, name.c_str());
        strcpy(c.phone_number, phoneNum.c_str());

        phoneBook.push_back(c);
    }

    string search_name = argv[1];
    char pat[65];
    strcpy(pat, search_name.c_str());


    char* d_pat;
    cudaMalloc(&d_pat, 65); //memory allocation
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice); //copying to device

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n*sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);


    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0){
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1,batchSize>>>(d_phoneBook, d_pat, offset);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

}

Writing phonebook_search.cu


In [2]:
!nvcc -arch=sm_75 phonebook_search.cu -o search_phonebook

/bin/bash: line 1: nvcc: command not found


In [None]:
!time ./search_phonebook '015' 1 > output.txt && sleep 2


real	0m0.511s
user	0m0.265s
sys	0m0.226s


In [None]:
!time ./search_phonebook '015 65 613' 2 > output.txt


real	0m0.313s
user	0m0.094s
sys	0m0.209s


In [None]:
!time ./search_phonebook '015 10' 10 > output.txt


real	0m0.256s
user	0m0.041s
sys	0m0.209s


In [None]:
!time ./search_phonebook 'NUSRAT' 50 > output.txt


real	0m0.239s
user	0m0.028s
sys	0m0.205s


In [None]:
%%writefile p2.cu
/*
Sort with name
*/

#include <bits/stdc++.h>
#include <cuda.h>
using namespace std;

struct Contact {
    char name[65];
    char phone_number[65];
};

string getInput(ifstream& file) {
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)) {
        if(c == '\"') {
            if(readSuru == 1) break;
            readSuru = 1;
        } else {
            if(readSuru) {
                ans.push_back(c);
            }
        }
    }
    return ans;
}

__device__ bool check(char* str1, char* str2) {
    for(int i = 0; str1[i] != '\0'; i++) {
        int flag = 1;
        for(int j = 0; str2[j] != '\0'; j++) {
            if(str1[i + j] != str2[j]) {
                flag = 0;
                break;
            }
        }
        if(flag == 1) return true;
    }
    return false;
}

__global__ void myKernel(Contact* phoneBook, char* pat, int offset, int totalContacts, int* results) {
    int threadNumber = threadIdx.x + offset;
    if(threadNumber >= totalContacts) return;
    if(check(phoneBook[threadNumber].name, pat)) {
        results[threadNumber] = 1;
    } else {
        results[threadNumber] = 0;
    }
}

int main(int argc, char* argv[]) {
    if(argc < 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <thread_limit>" << endl;
        return 1;
    }

    int threadLimit = atoi(argv[argc-1]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int count = 0;
    while(myfile.peek() != EOF) {
        if(count > 10000) break;
        count++;

        string name = getInput(myfile);
        string phoneNum = getInput(myfile);

        Contact c;
        strncpy(c.name, name.c_str(), 64);
        c.name[64] = '\0';
        strncpy(c.phone_number, phoneNum.c_str(), 64);
        c.phone_number[64] = '\0';

        phoneBook.push_back(c);
    }

    // Concatenate search term (handles multi-word input)
    string search_name;
    for(int i = 1; i < argc - 1; i++) {
        if(i > 1) search_name += " ";
        search_name += argv[i];
    }

    char pat[65];
    strncpy(pat, search_name.c_str(), 64);
    pat[64] = '\0';

    char* d_pat;
    cudaMalloc(&d_pat, 65);
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice);

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n * sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);

    int* d_results;
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMalloc(&d_results, n * sizeof(int));

    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0) {
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1, batchSize>>>(d_phoneBook, d_pat, offset, n, d_results);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Collect matches
    vector<Contact> matched;
    for(int i = 0; i < n; i++) {
        if(h_results[i] == 1) {
            matched.push_back(phoneBook[i]);
        }
    }

    // Sort ascending by name
    sort(matched.begin(), matched.end(), [](const Contact& a, const Contact& b) {
        return string(a.name) < string(b.name);
    });

    // Print results
    cout << "Search Results (Ascending Order):" << endl;
    for(auto &c : matched) {
        cout << c.name << " " << c.phone_number << endl;
    }

    // Cleanup
    free(h_results);
    cudaFree(d_results);
    cudaFree(d_phoneBook);
    cudaFree(d_pat);

    return 0;
}


Overwriting p2.cu


In [None]:
!nvcc -arch=sm_75 p2.cu -o p2

In [None]:
!time ./p2 'SUMIYA' 10 > o2.txt && sleep 2


real	0m0.261s
user	0m0.038s
sys	0m0.215s


In [None]:
%%writefile p3.cu
/*
Sort with name and aslo print with line number
*/
#include <bits/stdc++.h>
#include <cuda.h>
using namespace std;

struct Contact {
    char name[65];
    char phone_number[65];
    int line_number; // new field to store line number
};

string getInput(ifstream& file) {
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)) {
        if(c == '\"') {
            if(readSuru == 1) break;
            readSuru = 1;
        } else {
            if(readSuru) {
                ans.push_back(c);
            }
        }
    }
    return ans;
}

__device__ bool check(char* str1, char* str2) {
    for(int i = 0; str1[i] != '\0'; i++) {
        int flag = 1;
        for(int j = 0; str2[j] != '\0'; j++) {
            if(str1[i + j] != str2[j]) {
                flag = 0;
                break;
            }
        }
        if(flag == 1) return true;
    }
    return false;
}

__global__ void myKernel(Contact* phoneBook, char* pat, int offset, int totalContacts, int* results) {
    int threadNumber = threadIdx.x + offset;
    if(threadNumber >= totalContacts) return;
    if(check(phoneBook[threadNumber].name, pat)) {
        results[threadNumber] = 1;
    } else {
        results[threadNumber] = 0;
    }
}

int main(int argc, char* argv[]) {
    if(argc < 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <thread_limit>" << endl;
        return 1;
    }

    int threadLimit = atoi(argv[argc-1]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int line_number = 0;
    while(myfile.peek() != EOF) {
        line_number++;
        string name = getInput(myfile);
        string phoneNum = getInput(myfile);
        if(name.empty() && phoneNum.empty()) break;

        Contact c;
        strncpy(c.name, name.c_str(), 64);
        c.name[64] = '\0';
        strncpy(c.phone_number, phoneNum.c_str(), 64);
        c.phone_number[64] = '\0';
        c.line_number = line_number;

        phoneBook.push_back(c);
    }

    // Concatenate search term (handles multi-word input)
    string search_name;
    for(int i = 1; i < argc - 1; i++) {
        if(i > 1) search_name += " ";
        search_name += argv[i];
    }

    char pat[65];
    strncpy(pat, search_name.c_str(), 64);
    pat[64] = '\0';

    char* d_pat;
    cudaMalloc(&d_pat, 65);
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice);

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n * sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);

    int* d_results;
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMalloc(&d_results, n * sizeof(int));

    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0) {
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1, batchSize>>>(d_phoneBook, d_pat, offset, n, d_results);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Collect matches
    vector<Contact> matched;
    for(int i = 0; i < n; i++) {
        if(h_results[i] == 1) {
            matched.push_back(phoneBook[i]);
        }
    }

    // Sort ascending by name
    sort(matched.begin(), matched.end(), [](const Contact& a, const Contact& b) {
        return string(a.name) < string(b.name);
    });

    // Print results with line numbers
    cout << "Search Results (Ascending Order):" << endl;
    for(auto &c : matched) {
        cout << "Line " << c.line_number << ": " << c.name << " " << c.phone_number << endl;
    }

    // Cleanup
    free(h_results);
    cudaFree(d_results);
    cudaFree(d_phoneBook);
    cudaFree(d_pat);

    return 0;
}


Overwriting p3.cu


In [None]:
!nvcc -arch=sm_75 p3.cu -o p3

In [None]:
!time ./p3 'SUMIYA' 10 > o3.txt && sleep 2


real	0m32.315s
user	0m28.422s
sys	0m3.589s


In [None]:
!time ./p3 'CHOWDHURY' 1 > output.txt && sleep 2


real	3m14.202s
user	3m8.684s
sys	0m4.073s


In [None]:
%%writefile p4.cu
/*
Longest substring, with line number.
*/

#include <bits/stdc++.h>
#include <cuda.h>
using namespace std;

struct Contact {
    char name[65];
    char phone_number[65];
    int line_number;
};

string getInput(ifstream& file) {
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)) {
        if(c == '\"') {
            if(readSuru == 1) break;
            readSuru = 1;
        } else {
            if(readSuru) ans.push_back(c);
        }
    }
    return ans;
}

__device__ int longestSubstring(char* text, char* pat) {
    int maxLen = 0;
    for(int i = 0; text[i] != '\0'; i++) {
        for(int j = 0; pat[j] != '\0'; j++) {
            int len = 0;
            while(text[i+len] != '\0' && pat[j+len] != '\0' && text[i+len] == pat[j+len]) {
                len++;
            }
            if(len > maxLen) maxLen = len;
        }
    }
    return maxLen;
}

__global__ void myKernel(Contact* phoneBook, char* pat, int offset, int totalContacts, int* results) {
    int threadNumber = threadIdx.x + offset;
    if(threadNumber >= totalContacts) return;
    results[threadNumber] = longestSubstring(phoneBook[threadNumber].name, pat);
}

int main(int argc, char* argv[]) {
    if(argc < 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <thread_limit>" << endl;
        return 1;
    }

    int threadLimit = atoi(argv[argc-1]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int line_number = 0;
    while(myfile.peek() != EOF) {
        line_number++;
        string name = getInput(myfile);
        string phoneNum = getInput(myfile);
        if(name.empty() && phoneNum.empty()) break;

        Contact c;
        strncpy(c.name, name.c_str(), 64);
        c.name[64] = '\0';
        strncpy(c.phone_number, phoneNum.c_str(), 64);
        c.phone_number[64] = '\0';
        c.line_number = line_number;

        phoneBook.push_back(c);
    }

    // Concatenate search term (handles multi-word input)
    string search_name;
    for(int i = 1; i < argc - 1; i++) {
        if(i > 1) search_name += " ";
        search_name += argv[i];
    }

    char pat[65];
    strncpy(pat, search_name.c_str(), 64);
    pat[64] = '\0';

    char* d_pat;
    cudaMalloc(&d_pat, 65);
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice);

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n * sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);

    int* d_results;
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMalloc(&d_results, n * sizeof(int));

    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0) {
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1, batchSize>>>(d_phoneBook, d_pat, offset, n, d_results);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Find max length
    int maxLen = 0;
    for(int i = 0; i < n; i++) {
        if(h_results[i] > maxLen) maxLen = h_results[i];
    }

    // Collect only contacts with longest substring
    vector<Contact> matched;
    for(int i = 0; i < n; i++) {
        if(h_results[i] == maxLen && maxLen > 0) {
            matched.push_back(phoneBook[i]);
        }
    }

    // Sort ascending by name
    sort(matched.begin(), matched.end(), [](const Contact& a, const Contact& b){
        return string(a.name) < string(b.name);
    });

    // Print results
    cout << "Longest substring length = " << maxLen << endl;
    cout << "Contacts containing the longest substring:" << endl;
    for(auto &c : matched) {
        cout << "Line " << c.line_number << ": " << c.name << " " << c.phone_number << endl;
    }

    free(h_results);
    cudaFree(d_results);
    cudaFree(d_phoneBook);
    cudaFree(d_pat);

    return 0;
}


Overwriting p4.cu


In [None]:
!nvcc -arch=sm_75 p4.cu -o p4

In [None]:
!time ./p4 'KRISNA SULTANA DOLA' 500 > o4.txt && sleep 2


real	0m16.754s
user	0m12.770s
sys	0m3.742s


In [None]:
!time ./p4 'KRISNA SULTANA DOLA' 250 > o4.txt && sleep 2


real	0m18.009s
user	0m13.780s
sys	0m3.839s


In [None]:
!time ./p4 'BIBI MORIOM TURABIN' 50 > o4.txt && sleep 2


real	0m27.587s
user	0m23.364s
sys	0m3.802s


In [None]:
!time ./p4 'BIBI MORIOM KRISNA SULTANA DOLA TURABIN' 50 > o4.txt && sleep 2


real	0m36.784s
user	0m32.508s
sys	0m3.974s


In [None]:
%%writefile p5.cu
/*
Longest substring, case-sensitive.
*/

#include <bits/stdc++.h>
#include <cuda.h>
using namespace std;

struct Contact {
    char name[65];
    char phone_number[65];
    int line_number;
};

string getInput(ifstream& file) {
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)) {
        if(c == '\"') {
            if(readSuru == 1) break;
            readSuru = 1;
        } else {
            if(readSuru) ans.push_back(c);
        }
    }
    return ans;
}

// Case-sensitive longest substring, ignoring pure space matches
__device__ int longestSubstring(char* text, char* pat, char* outSub) {
    int maxLen = 0;
    int best_i = -1;
    for(int i = 0; text[i] != '\0'; i++) {
        for(int j = 0; pat[j] != '\0'; j++) {
            int len = 0;
            while(text[i+len] != '\0' && pat[j+len] != '\0' &&
                  text[i+len] == pat[j+len]) {
                len++;
            }
            // check that substring is not just spaces
            if(len > maxLen) {
                bool allSpaces = true;
                for(int k = 0; k < len; k++) {
                    if(text[i+k] != ' ') { allSpaces = false; break; }
                }
                if(!allSpaces) {
                    maxLen = len;
                    best_i = i;
                }
            }
        }
    }
    if(maxLen > 0 && best_i != -1) {
        for(int k = 0; k < maxLen; k++) {
            outSub[k] = text[best_i + k];
        }
        outSub[maxLen] = '\0';
    } else {
        outSub[0] = '\0';
    }
    return maxLen;
}

__global__ void myKernel(Contact* phoneBook, char* pat, int offset, int totalContacts,
                         int* results, char* resultSubs) {
    int threadNumber = threadIdx.x + offset;
    if(threadNumber >= totalContacts) return;

    char* outSub = resultSubs + threadNumber * 65; // each substring slot
    results[threadNumber] = longestSubstring(phoneBook[threadNumber].name, pat, outSub);
}

int main(int argc, char* argv[]) {
    if(argc < 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <thread_limit>" << endl;
        return 1;
    }

    int threadLimit = atoi(argv[argc-1]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int line_number = 0;
    while(myfile.peek() != EOF) {
        line_number++;
        string name = getInput(myfile);
        string phoneNum = getInput(myfile);
        if(name.empty() && phoneNum.empty()) break;

        Contact c;
        strncpy(c.name, name.c_str(), 64);
        c.name[64] = '\0';
        strncpy(c.phone_number, phoneNum.c_str(), 64);
        c.phone_number[64] = '\0';
        c.line_number = line_number;

        phoneBook.push_back(c);
    }

    // Concatenate search term
    string search_name;
    for(int i = 1; i < argc - 1; i++) {
        if(i > 1) search_name += " ";
        search_name += argv[i];
    }

    char pat[65];
    strncpy(pat, search_name.c_str(), 64);
    pat[64] = '\0';

    char* d_pat;
    cudaMalloc(&d_pat, 65);
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice);

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n * sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);

    int* d_results;
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMalloc(&d_results, n * sizeof(int));

    char* d_resultSubs;
    char* h_resultSubs = (char*)malloc(n * 65);
    cudaMalloc(&d_resultSubs, n * 65);

    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0) {
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1, batchSize>>>(d_phoneBook, d_pat, offset, n, d_results, d_resultSubs);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);
    cudaMemcpy(h_resultSubs, d_resultSubs, n * 65, cudaMemcpyDeviceToHost);

    // Find global max length and collect all contacts with that length
    int maxLen = 0;
    for(int i = 0; i < n; i++) {
        if(h_results[i] > maxLen) {
            maxLen = h_results[i];
        }
    }

    vector<Contact> matched;
    for(int i = 0; i < n; i++) {
        if(h_results[i] == maxLen && maxLen > 0) {
            matched.push_back(phoneBook[i]);
        }
    }

    // Print the longest substring only once
    if(maxLen > 0 && !matched.empty()) {
        // Take substring from the first matched contact
        string longestSub = string(h_resultSubs + matched[0].line_number - 1 * 65);

        cout << "Longest substring length (case-sensitive, ignoring spaces) = " << maxLen << endl;
        cout << "Longest substring match: \"" << longestSub << "\"" << endl;
        cout << "Contacts containing this substring:" << endl;

        for(auto &c : matched) {
            cout << "Line " << c.line_number << ": "
                 << c.name << " " << c.phone_number << endl;
        }
    } else {
        cout << "No valid substring match found." << endl;
    }

    free(h_results);
    free(h_resultSubs);
    cudaFree(d_results);
    cudaFree(d_resultSubs);
    cudaFree(d_phoneBook);
    cudaFree(d_pat);

    return 0;
}


Overwriting p5.cu


In [None]:
!nvcc -arch=sm_75 p5.cu -o p5

In [None]:
!time ./p5 'ANTU RANI DEY' 500 > o5.txt && sleep 2


real	0m16.966s
user	0m12.386s
sys	0m4.331s


In [None]:
%%writefile p6.cu
/*Matching with case insensitive.*/

/*Searching with case insensitive. */
#include <bits/stdc++.h>
#include <cuda.h>
using namespace std;

struct Contact {
    char name[65];
    char phone_number[65];
    int line_number;
};

string getInput(ifstream& file) {
    string ans;
    char c;
    int readSuru = 0;
    while(file.get(c)) {
        if(c == '\"') {
            if(readSuru == 1) break;
            readSuru = 1;
        } else {
            if(readSuru) ans.push_back(c);
        }
    }
    return ans;
}

// Convert a character to lowercase safely on device
__device__ char toLower(char c) {
    if(c >= 'A' && c <= 'Z') return c + ('a' - 'A');
    return c;
}

// Case-insensitive longest substring
__device__ int longestSubstring(char* text, char* pat) {
    int maxLen = 0;
    for(int i = 0; text[i] != '\0'; i++) {
        for(int j = 0; pat[j] != '\0'; j++) {
            int len = 0;
            while(text[i+len] != '\0' && pat[j+len] != '\0' &&
                  toLower(text[i+len]) == toLower(pat[j+len])) {
                len++;
            }
            if(len > maxLen) maxLen = len;
        }
    }
    return maxLen;
}

__global__ void myKernel(Contact* phoneBook, char* pat, int offset, int totalContacts, int* results) {
    int threadNumber = threadIdx.x + offset;
    if(threadNumber >= totalContacts) return;
    results[threadNumber] = longestSubstring(phoneBook[threadNumber].name, pat);
}

int main(int argc, char* argv[]) {
    if(argc < 3) {
        cerr << "Usage: " << argv[0] << " <search_term> <thread_limit>" << endl;
        return 1;
    }

    int threadLimit = atoi(argv[argc-1]);

    ifstream myfile("/content/drive/MyDrive/Parallel_Dataset/labtest_dataset1.txt");
    vector<Contact> phoneBook;

    int line_number = 0;
    while(myfile.peek() != EOF) {
        line_number++;
        string name = getInput(myfile);
        string phoneNum = getInput(myfile);
        if(name.empty() && phoneNum.empty()) break;

        Contact c;
        strncpy(c.name, name.c_str(), 64);
        c.name[64] = '\0';
        strncpy(c.phone_number, phoneNum.c_str(), 64);
        c.phone_number[64] = '\0';
        c.line_number = line_number;

        phoneBook.push_back(c);
    }

    // Concatenate search term (handles multi-word input)
    string search_name;
    for(int i = 1; i < argc - 1; i++) {
        if(i > 1) search_name += " ";
        search_name += argv[i];
    }

    char pat[65];
    strncpy(pat, search_name.c_str(), 64);
    pat[64] = '\0';

    char* d_pat;
    cudaMalloc(&d_pat, 65);
    cudaMemcpy(d_pat, pat, 65, cudaMemcpyHostToDevice);

    int n = phoneBook.size();
    Contact* d_phoneBook;
    cudaMalloc(&d_phoneBook, n * sizeof(Contact));
    cudaMemcpy(d_phoneBook, phoneBook.data(), n * sizeof(Contact), cudaMemcpyHostToDevice);

    int* d_results;
    int* h_results = (int*)malloc(n * sizeof(int));
    cudaMalloc(&d_results, n * sizeof(int));

    int bakiAche = n;
    int offset = 0;
    while(bakiAche > 0) {
        int batchSize = min(threadLimit, bakiAche);
        myKernel<<<1, batchSize>>>(d_phoneBook, d_pat, offset, n, d_results);
        cudaDeviceSynchronize();

        bakiAche -= batchSize;
        offset += batchSize;
    }

    cudaMemcpy(h_results, d_results, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Find max length
    int maxLen = 0;
    for(int i = 0; i < n; i++) {
        if(h_results[i] > maxLen) maxLen = h_results[i];
    }

    // Collect only contacts with longest substring
    vector<Contact> matched;
    for(int i = 0; i < n; i++) {
        if(h_results[i] == maxLen && maxLen > 0) {
            matched.push_back(phoneBook[i]);
        }
    }

    // Sort ascending by name
    sort(matched.begin(), matched.end(), [](const Contact& a, const Contact& b){
        return string(a.name) < string(b.name);
    });

    // Print results
    cout << "Longest substring length (case-insensitive) = " << maxLen << endl;
    cout << "Contacts containing the longest substring:" << endl;
    for(auto &c : matched) {
        cout << "Line " << c.line_number << ": " << c.name << " " << c.phone_number << endl;
    }

    free(h_results);
    cudaFree(d_results);
    cudaFree(d_phoneBook);
    cudaFree(d_pat);

    return 0;
}


Overwriting p5.cu


In [None]:
!nvcc -arch=sm_75 p6.cu -o p6

In [None]:
!time ./p6 'KRISNA Miss SULTANA DOLA' 500 > o5.txt && sleep 2


real	0m40.575s
user	0m32.543s
sys	0m7.613s
