<a href="https://colab.research.google.com/github/Anteii/HPC-Labs/blob/main/lab4/MassSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Notebook setup

In [2]:
from IPython.display import clear_output

In [3]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Sun_Feb_14_21:12:58_PST_2021
Cuda compilation tools, release 11.2, V11.2.152
Build cuda_11.2.r11.2/compiler.29618528_0


In [4]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [5]:
!git clone https://github.com/NVIDIA/cuda-samples.git
!make -C /content/cuda-samples/Samples/1_Utilities/deviceQueryDrv/
!/content/cuda-samples/bin/x86_64/linux/release/deviceQueryDrv

Cloning into 'cuda-samples'...
remote: Enumerating objects: 11024, done.[K
remote: Counting objects: 100% (11024/11024), done.[K
remote: Compressing objects: 100% (1839/1839), done.[K
remote: Total 11024 (delta 9184), reused 10979 (delta 9159), pack-reused 0[K
Receiving objects: 100% (11024/11024), 127.03 MiB | 18.12 MiB/s, done.
Resolving deltas: 100% (9184/9184), done.
Checking out files: 100% (3615/3615), done.
make: Entering directory '/content/cuda-samples/Samples/1_Utilities/deviceQueryDrv'
/usr/local/cuda/bin/nvcc -ccbin g++ -I../../../Common  -m64    --threads 0 --std=c++11 -gencode arch=compute_35,code=compute_35 -o deviceQueryDrv.o -c deviceQueryDrv.cpp
/usr/local/cuda/bin/nvcc -ccbin g++   -m64      -gencode arch=compute_35,code=compute_35 -o deviceQueryDrv deviceQueryDrv.o  -L/usr/local/cuda/lib64/stubs -lcuda
mkdir -p ../../../bin/x86_64/linux/release
cp deviceQueryDrv ../../../bin/x86_64/linux/release
make: Leaving directory '/content/cuda-samples/Samples/1_Utilities/

In [6]:
!mkdir src

# Python (CPU)

In [7]:
import numpy as np
np.random.seed(41)

def generate_sub(alphabet, h_min, h_max):
  sub_len = np.random.randint(h_min, h_max, size=1)
  return np.random.choice(alphabet, size=sub_len)

def gen_d(alphabet, subs):
  d = {ch: [] for ch in alphabet}
  for sub_ind, sub in enumerate(subs):
    for ch_ind, ch in enumerate(sub):
      d[ch].append((sub_ind, ch_ind))
  return d

def gen_working_mat(subs, N, H):
  mat = np.array([len(sub) for sub in subs])
  return mat.reshape(-1,1).repeat(H, axis=1)

def iterate(text, d, mat):
  for ind, ch in enumerate(text):
    for pair in d[ch]:
      mat[pair[0], ind - pair[1]] -= 1
  return mat

def is_contain(mat):
  return (mat == 0).any(axis=1)

def find_indices(mat):
  sub_ind, pos_ind = np.where(mat == 0)
  return np.stack([sub_ind, pos_ind]).T

In [8]:
# Define alphabet
S = 6
alphabet = np.arange(0, S, dtype=np.uint8)
# Example
#S = 6
#alphabet = ["a", "b", "c", "d", "e", "f"]

In [9]:
# Define text
H = 100
text = np.random.choice(alphabet, size=H)

# Example
#H = 6
#text = np.array(["a", "a", "e", "f", "e", "d"])

In [10]:
h_min, h_max = 1, 20
N = 10
subs = [generate_sub(alphabet, h_min, h_max) for i in range(N)]

# Example
#N = 3
#subs = [np.array(["a", "a"]), np.array(["a"]), np.array(["f", "e", "d"])]

In [11]:
d = gen_d(alphabet, subs)
mat = gen_working_mat(subs, N, H)

In [12]:
mat = iterate(text, d, mat)

In [13]:
is_contain(mat)

array([False, False, False, False, False,  True, False, False, False,
       False])

In [None]:
find_indices(mat)

# C++ (CPU)

In [330]:
%%writefile src/cpu.cpp
#include <iostream>
#include <vector>
#include <map>
#include <fstream>
#include <string>

using namespace std;

enum SearchType{
  Indicies,
  Entries
};

template <typename T>
vector<int> getSubSizes(const vector<vector<T>>& subs){
  vector<int> subsSizes(subs.size(), 0);
  for (int subInd = 0; subInd < subs.size(); ++subInd){
    subsSizes[subInd] = subs[subInd].size();
  }
  return subsSizes;
}

template <typename T>
map<T, vector<pair<int, int>>> generateDict(const vector<T>& alphabet, const vector<vector<T>>& subs){
  map<T, vector<pair<int, int>>> dict;
  for (auto chr : alphabet){
    dict[chr] = vector<pair<int, int>>();
  }
  
  for (int sub_ind = 0; sub_ind < subs.size(); ++sub_ind){
    auto sub = subs[sub_ind];
    for (int chr_ind = 0; chr_ind < sub.size(); ++chr_ind){
      auto chr = sub[chr_ind];
      dict[chr].push_back({sub_ind, chr_ind});
    }
  }
  
  return dict;
}

vector<vector<int>> generateMatrix(const vector<int>& subsSizes, int textSize){
  vector<vector<int>> mat;
  for (auto subSize : subsSizes){
    mat.push_back(vector<int>(textSize, subSize));
  }
  return mat;
}

template <typename T>
vector<T> generateText(const vector<T>& alphabet, int size){
  vector<T> text(size);
  for (int i = 0; i < size; ++i){
      int ind = rand() % alphabet.size();
      text[i] = alphabet[ind];
  }
  return text;
}

template <typename T>
vector<vector<T>> generateSubs(const vector<T>& alphabet, int n, int sizeMin, int sizeMax){
  vector<vector<T>> subs;

  for (int subInd = 0; subInd < n; ++subInd){
    int size = sizeMin + rand() % (sizeMax - sizeMin + 1); 
    auto sub = generateText(alphabet, size);
    subs.push_back(sub);
  }

  return subs;
}

template <typename T>
void iterate(const vector<T>& text, const map<T, vector<pair<int, int>>>& dict,
            vector<vector<int>>& matrix){
  for (int chr_ind = 0; chr_ind < text.size(); ++chr_ind){
    for (const auto pair : dict.at(text[chr_ind])){
      matrix[pair.first][chr_ind - pair.second]--;
    }
  }
}

vector<pair<int, int>> findIndices(const vector<vector<int>>& mat){
  vector<pair<int, int>> result;

  int textSize = mat[0].size();

  for (int subInd = 0; subInd < mat.size(); ++subInd){
    for (int textPos = 0; textPos < textSize; ++textPos){
      if (mat[subInd][textPos] == 0){
        result.push_back({subInd, textPos});
      }
    }
  }

  return result;
}

vector<bool> findEntries(const vector<vector<int>>& mat){
  
  vector<bool> result(mat.size(), false);
  int textSize = mat[0].size();

  for (int subInd = 0; subInd < mat.size(); ++subInd){
    for (int textPos = 0; textPos < textSize; ++textPos){
      if (mat[subInd][textPos] == 0){
        result[subInd] = true;
        break;
      }
    }
  }

  return result;
}

ostream& operator<<(ostream& os, const vector<pair<int, int>>& pairs){
    os << "{";
    for (int i = 0; i < pairs.size(); ++i){
      auto it = pairs[i];
      os << "(" << it.first << ", " << it.second;
      os << (i == pairs.size() - 1 ? ")}" : "), ");
    }

    return os;
}

ostream& operator<<(ostream& os, const vector<bool>& flags){
    os << "{";
    for (int i = 0; i < flags.size(); ++i){
      os << (flags[i] ? "True" : "False");
      os << (i == flags.size() - 1 ? "}" : ", ");
    }

    return os;
}

template<typename T>
void printText(const vector<T>& text, ostream& os){ 
   for (int i = 0; i < text.size(); ++i)
   {
     os << (T)text[i] << (i == text.size() - 1 ? "" : " ");
   }
   os << endl;
}

template<typename T>
void printSubs(const vector<vector<T>>& subs, ostream& os){ 
   for(auto sub : subs){
     printText(sub, os);
   }
}

void search(int textSize, int subN, int subSizeMin, int subSizeMax, SearchType searchType){
  
  ofstream myfile;
  myfile.open ("data.txt");

  // Generate 8bit alphabet
  int alphabetSize = 256;
  vector<int> alphabet(alphabetSize);
  for (int i = 0; i < alphabetSize; ++i){
    alphabet[i] = i;
  }

  // Generate text
  vector<int> text = generateText<int>(alphabet, textSize);

  // Generate substrings
  auto subs = generateSubs<int>(alphabet, subN, subSizeMin, subSizeMax);
  vector<int> subsSizes = getSubSizes<int>(subs);

  // Preliminary step
  map<int, vector<pair<int, int>>> dict = generateDict<int>(alphabet, subs);
  vector<vector<int>> mat = generateMatrix(subsSizes, textSize);
  
  // Algorithm steps
  iterate<int>(text, dict, mat);

  // Save input data
  myfile << textSize << " " << subN << " " <<  subSizeMin << " "
     << subSizeMax << " " << searchType << endl;
  printText(alphabet, myfile);
  printText(text, myfile);
  printSubs(subs, myfile);

  // Interpret results
  switch(searchType){
    case Indicies:{
      vector<pair<int, int>> indices = findIndices(mat);
      myfile << indices << endl;
    }
    break;
    case Entries:{
      vector<bool> entries = findEntries(mat);
      myfile << entries << endl;
      break;
    } 
  }
  myfile.close();
}

int main(int argc, char** argv){
  srand(42);

  int textSize = stoi(argv[1]);
  int subN = stoi(argv[2]);
  int subSizeMin = stoi(argv[3]);
  int subSizeMax = stoi(argv[4]);
  SearchType searchType = (SearchType)stoi(argv[5]);

  search(textSize, subN, subSizeMin, subSizeMax, searchType);

  return 0;
}

Overwriting src/cpu.cpp


In [331]:
!g++ -O2 src/cpu.cpp -o cpu

In [333]:
!./cpu 100 10 1 7 1

# C++ (GPU)