<a href="https://colab.research.google.com/github/Syncrei/SmithWatermanCuda/blob/main/smithwatermancuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://raw.githubusercontent.com/Syncrei/SmithWatermanCuda/main/Test/query1.txt
!wget https://raw.githubusercontent.com/Syncrei/SmithWatermanCuda/main/Test/query.txt
!wget https://raw.githubusercontent.com/Syncrei/SmithWatermanCuda/main/Test/db1.txt
!wget https://raw.githubusercontent.com/Syncrei/SmithWatermanCuda/main/Test/db.txt

In [None]:
%%writefile sw.cu

#include <iostream>
#include <vector>
#include <algorithm>
#include <stdio.h>
#include <fstream>
#include <string>
#include <fstream>
#include <sstream>
#include <iterator>
#include <thrust/device_vector.h>

bool compareSecondElementDescending(const std::pair<int, char>& a, const std::pair<int, char>& b) {
    return a.second > b.second;
}

bool ReadFromFileDB(const std::string& pathToFile, int& index, std::string& seq);
bool ReadDBLimited(const std::string& pathToFile, const long long& maxWeight, const int& querySize,int& maxLen,
    std::vector<int>& indexDb, std::vector<int>& indexChar, std::string& seqDb);
void ReadFileQuery(std::string& a, std::string file_patch);
void WriteFileOut(std::string file_patch, std::vector<std::pair<int, int>> scores, int max_results);

__global__
void CalculateAllDdScoreKernel(int currentStep, char* query, int sizeSeq1, char* dbQuery, int* indexChar, int* scoreMatrixDb,
    int matchScore, int mismatchScore, int gapScore) {

    long blockId = blockIdx.x;
    long threadId = blockIdx.y * blockDim.x + threadIdx.x;

    int j = threadId + 1;
    int i = currentStep + 2 - j;
    int startIdx = (blockId == 0) ? 0 : indexChar[blockId - 1];
    int endIdx = indexChar[blockId];
    int sizeSeq2 = endIdx - startIdx;


    if (i > 0 && j > 0 && i <= sizeSeq1 && j <= sizeSeq2) {
        char* seq2 = dbQuery + startIdx;
        int* scoreMatrix = scoreMatrixDb + (startIdx + blockId) * (sizeSeq1 + blockId);

        int score_diag = scoreMatrix[(i - 1) * (sizeSeq2 + 1) + j - 1] + (query[i - 1] == seq2[j - 1] ? matchScore : mismatchScore);
        int score_up = scoreMatrix[(i - 1) * (sizeSeq2 + 1) + j] + gapScore;
        int score_left = scoreMatrix[i * (sizeSeq2 + 1) + j - 1] + gapScore;
        scoreMatrix[i * (sizeSeq2 + 1) + j] = max(0, max(score_diag, max(score_up, score_left)));

    }
}

__global__
void MaxScoreDbKernel(int* maxScore, int* scoreMatrixDb, int* indexChar, int sizeSeq1)
{
    long threadId = blockIdx.y * blockDim.x + threadIdx.x;
    int startIdx = (threadId == 0) ? 0 : indexChar[threadId - 1];
    int endIdx = indexChar[threadId];
    int sizeSeq2 = endIdx - startIdx;

    int* scoreMatrix = scoreMatrixDb + (startIdx + threadId) * (sizeSeq1 + threadId);
    int maxSc = 0;
    for (int i = 0; i < (sizeSeq1 + 1) * (sizeSeq2 + 1); i++) maxSc = max(maxSc, scoreMatrix[i]);
    maxScore[threadId] = maxSc;
}

void CalculateAllDdScore(std::string db_patch, std::string query_patch, std::string out_patch, int max_results) {
    int matchScore = 1;
    int mismatchScore = -1;
    int gapScore = -2;

    std::string query;
    ReadFileQuery(query, query_patch);


    char* d_query;

    cudaMalloc(&d_query, query.size() * sizeof(char));
    cudaMemcpy(d_query, query.c_str(), query.size() * sizeof(char), cudaMemcpyHostToDevice);

    cudaDeviceProp deviceProp;
    size_t freeMemory, totalMemory;
    cudaGetDeviceProperties(&deviceProp, 0);
    cudaMemGetInfo(&freeMemory, &totalMemory);

    std::vector<int> score;
    std::vector<int> indexes;

    std::vector<int> indexDb;
    std::vector<int> indexChar;
    std::string seqDb;
    int maxLen = query.size();

    while (ReadDBLimited(db_patch, (long long) freeMemory * 0.95, query.size(), maxLen, indexDb, indexChar, seqDb))
    {

        int countSeq = indexDb.size();
        char* d_seqDb;
        int* d_indexChar;
        int* d_scoreMatrix;


        cudaMalloc(&d_seqDb, seqDb.size() * sizeof(char));
        cudaMalloc(&d_indexChar, countSeq * sizeof(int));
        cudaMalloc(&d_scoreMatrix, (countSeq + seqDb.size()) * (countSeq + query.size()) * sizeof(int));

        cudaMemcpy(d_seqDb, seqDb.data(), seqDb.size(), cudaMemcpyHostToDevice);
        cudaMemcpy(d_indexChar, indexChar.data(), indexChar.size() * sizeof(int), cudaMemcpyHostToDevice);
        cudaMemset(d_scoreMatrix, 0, (countSeq + seqDb.size()) * (countSeq + query.size()) * sizeof(int));

        int threadsPerBlock = min(1024, maxLen);
        dim3 numBlocks(countSeq, (maxLen + threadsPerBlock - 1) / threadsPerBlock);
        for (int i = 0; i < query.size() + maxLen - 1; i++) {
            CalculateAllDdScoreKernel << <numBlocks, threadsPerBlock >> > (i, d_query, query.size(), d_seqDb, d_indexChar, d_scoreMatrix,
                matchScore, mismatchScore, gapScore);
        }

        int* d_scoreMax;
        cudaMalloc(&d_scoreMax, countSeq * sizeof(int));
        cudaMemset(d_scoreMax, 0, countSeq * sizeof(int));

        int threadsPerBlock1 = min(1024, countSeq);
        int numBlocks1 = (countSeq + threadsPerBlock - 1) / threadsPerBlock;
        MaxScoreDbKernel<<<numBlocks1,threadsPerBlock1 >>>(d_scoreMax, d_scoreMatrix, d_indexChar, (int)query.size());

        int size = countSeq * sizeof(int);
        int* h_data = (int*)malloc(size);
        cudaMemcpy(h_data, d_scoreMax, size, cudaMemcpyDeviceToHost);
        std::vector<int> vec(h_data, h_data + size / sizeof(int));

        score.insert(score.end(), vec.begin(), vec.end());
        indexes.insert(indexes.end(), indexDb.begin(), indexDb.end());


        indexDb.clear();
        indexChar.clear();
        seqDb.clear();
        maxLen = query.size();

        cudaFree(d_seqDb);
        cudaFree(d_indexChar);
        cudaFree(d_scoreMatrix);
        cudaFree(d_scoreMax);

        delete[] h_data;
    }

    cudaFree(d_query);

    std::vector<std::pair<int, int>> scorAndIndex;
    for (int i = 0; i < indexes.size(); i++) {
        scorAndIndex.push_back(std::make_pair(indexes[i], score[i]));
    }

    std::sort(scorAndIndex.begin(), scorAndIndex.end(), compareSecondElementDescending);

    WriteFileOut(out_patch, scorAndIndex, max_results);

}


int32_t main(int argc, char* argv[]) {


    std::string db_name;
    std::string query_name;
    std::string out_name;
    int max_results = 250;

    for (int i = 1; i < argc; i++) {
        if (std::strcmp(argv[i], "--db") == 0 && i + 1 < argc) {
            db_name = argv[++i];
        }
        else if (std::strcmp(argv[i], "--query") == 0 && i + 1 < argc) {
            query_name = argv[++i];
        }
        else if (std::strcmp(argv[i], "--out") == 0 && i + 1 < argc) {
            out_name = argv[++i];
        }
        else if (std::strcmp(argv[i], "--max_results") == 0 && i + 1 < argc) {
            max_results = std::stoi(argv[++i]);
        }
    }

    if (db_name.empty() || query_name.empty() || out_name.empty()) {
        std::cerr << "Error: Missing parameters! You need to provide --db, --query, and --out.\n";
        return 1;
    }

    CalculateAllDdScore(db_name, query_name, out_name, max_results);
    return 0;
}

void WriteFileOut(std::string file_patch, std::vector<std::pair<int, int>> scores, int max_results) {

    std::ofstream out;
    out.open(file_patch);
    if (out.is_open()) {
        int i = 1;
        for (std::pair<int, int> score : scores) {
            out << score.first << ", " << score.second << "\n";
            if (i++ >= max_results) break;
        }
    }
    out.close();
}

void ReadFileQuery(std::string& a, std::string file_patch) {
    std::ifstream in(file_patch);
    if (in.is_open()) {
        std::getline(in, a);
    }
    in.close();
}

bool ReadFromFileDB(const std::string& pathToFile, int& index, std::string& seq) {
    static std::ifstream file(pathToFile);
    static std::string line;

    if (std::getline(file, line)) {
        std::stringstream ss(line);
        std::string temp;
        if (std::getline(ss, temp, '|')) {
            index = std::stoi(temp);
            std::getline(ss, seq);
            return true;
        }
    }

    file.close();
    return false;
}

bool ReadDBLimited(const std::string& pathToFile, const long long& maxWeight, const int& querySize, int& maxLen,
    std::vector<int>& indexDb, std::vector<int>& indexChar, std::string& seqDb) {
    static std::string lineS;
    static int indexS;

    long long weight = 0;
    if (lineS.size() != 0)
    {
        seqDb += lineS;
        indexDb.push_back(indexS);
        indexChar.push_back(lineS.size());
        weight += lineS.size() * sizeof(char) + (1 + querySize) * (1 + lineS.size()) * sizeof(int) + sizeof(int)*2;
        maxLen = std::max(maxLen, (int)lineS.size());
    }

    lineS = "";
    int index;
    std::string seq;

    long long tWeight = 0;
    while (ReadFromFileDB(pathToFile, index, seq))
    {
        tWeight = seq.size() * sizeof(char) + (1 + querySize) * (1 + seq.size()) * sizeof(int) + sizeof(int)*2;
        if (tWeight + weight < maxWeight) {
            seqDb += seq;
            indexDb.push_back(index);
            indexChar.push_back(seqDb.size());
            weight += tWeight;
            maxLen = std::max(maxLen, (int)seq.size());
        }
        else {
            lineS = seq;
            indexS = index;
            return true;
        }
    }
    return seqDb.size() == 0 ? false : true;
}

In [None]:
!nvcc sw.cu -o sw

In [None]:
%%shell
nvprof ./sw  --db db.txt --query query.txt --out out.txt