## Download libreria esterna Roaring

In [None]:
!git clone https://github.com/RoaringBitmap/CRoaring.git
!cd CRoaring && mkdir build && cd build && cmake .. && make

Cloning into 'CRoaring'...
remote: Enumerating objects: 18041, done.[K
remote: Counting objects: 100% (830/830), done.[K
remote: Compressing objects: 100% (216/216), done.[K
remote: Total 18041 (delta 756), reused 614 (delta 614), pack-reused 17211 (from 2)[K
Receiving objects: 100% (18041/18041), 50.37 MiB | 9.70 MiB/s, done.
Resolving deltas: 100% (11183/11183), done.
Updating files: 100% (1983/1983), done.
-- The CXX compiler identification is GNU 11.4.0
-- The C compiler identification is GNU 11.4.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- No build type selected, default to Release
-- CPM: Adding package cmocka@ (

Nota! \\
Bisogna spostare file roaring.hh da cartella c++ a cartella include/roaring

# Versione con for paralleli

In [None]:
%%writefile code1.cpp
#include <iostream>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <roaring/roaring.hh>
#include <omp.h>
#include <string>
#include <algorithm>
#include <regex>
#include <chrono>
#include <set>

using namespace std;
using namespace std::chrono;

using Itemset = set<string>;
using TransactionDB = vector<Itemset>;

TransactionDB read_csv_and_save_binary(const string& filename, const string& binary_filename) {
    std::cout << "Lettura e Salvataggio in file binario ..." << endl;
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    // Leggi il file CSV
    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }

    // Ora salva il dataset in un formato binario
    ofstream outfile(binary_filename, ios::binary);
    for (const auto& transaction : dataset) {
        size_t size = transaction.size();
        outfile.write(reinterpret_cast<const char*>(&size), sizeof(size));  // scrivi la dimensione della transazione
        for (const auto& item : transaction) {
            size_t length = item.size();
            outfile.write(reinterpret_cast<const char*>(&length), sizeof(length));  // scrivi la lunghezza della stringa
            outfile.write(item.c_str(), length);  // scrivi la stringa
        }
    }
    std::cout << "File CSV letto e salvato in formato binario!" << endl;
    return dataset;
}

// Funzione per leggere il file binario e ricostruire il dataset
TransactionDB read_binary(const string& binary_filename) {
    ifstream infile(binary_filename, ios::binary);
    TransactionDB dataset;

    while (infile) {
        size_t size;
        infile.read(reinterpret_cast<char*>(&size), sizeof(size));
        if (infile.eof()) break;  // fine del file

        Itemset transaction;
        for (size_t i = 0; i < size; ++i) {
            size_t length;
            infile.read(reinterpret_cast<char*>(&length), sizeof(length));
            string item(length, '\0');
            infile.read(&item[0], length);
            transform(item.begin(), item.end(), item.begin(), ::tolower);  // conversione in lowercase
            transaction.insert(item);
        }
        dataset.push_back(transaction);
    }

    std::cout << "File binario letto!" << endl;
    return dataset;
}


TransactionDB read_csv(const string& filename) {
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }
    std::cout << "Lettura file completata! \nInizio versione con Roaring BitMap e OpenMP dell'algoritmo APriori ..." << endl;
    return dataset;
}


uint64_t calculate_support(const roaring::Roaring& bitmap) {
    return bitmap.cardinality();
}


pair<vector<vector<pair<Itemset, roaring::Roaring>>>,bool> generate_candidates(const vector<vector<pair<Itemset, roaring::Roaring>>>& frequent_itemsets, int k) {
    vector<vector<pair<Itemset, roaring::Roaring>>> candidates(omp_get_max_threads());
    int numeroThread = omp_get_max_threads();
    bool inserimento = false;

    #pragma omp parallel for num_threads(omp_get_max_threads()) collapse(2)
    for (int t1 = 0; t1 < numeroThread;++t1) {
        for (int t2 = t1;t2 < numeroThread; ++t2) {
            vector<pair<Itemset, roaring::Roaring>> itemsetsT1 = frequent_itemsets[t1];
            vector<pair<Itemset, roaring::Roaring>> itemsetsT2 = frequent_itemsets[t2];

            int n1 = itemsetsT1.size();
            int n2 = itemsetsT2.size();

            for ( int i=0 ; i < n1; ++i) {
                int j;
                if (t1 == t2)
                    j = i + 1;
                else
                    j = 0;
                for ( j  ; j < n2; ++j) {
                    auto& itemset_i = itemsetsT1[i].first;
                    auto& itemset_j = itemsetsT2[j].first;

                    auto it1 = itemset_i.begin();
                    auto it2 = itemset_j.begin();
                    bool prefissoUguale = true;

                    for (int x = 0; x < k - 1; ++x, ++it1, ++it2) {
                        if (*it1 != *it2) {
                            prefissoUguale = false;
                            break;
                        }
                    }

                    if (!prefissoUguale) continue;

                    Itemset combined = itemset_i;
                    combined.insert(*itemset_j.rbegin());

                    roaring::Roaring combined_bitmap = itemsetsT1[i].second & itemsetsT2[j].second;

                    candidates[omp_get_thread_num()].emplace_back(combined, combined_bitmap);
                    if (!inserimento) {
                        inserimento = true;
                    }
                }
            }
        }
    }
    return { candidates,inserimento };
}


vector<vector<pair<Itemset, int>>> apriori_roaring_bitmap(TransactionDB& dataset, int minsup) {

    vector<vector<pair<Itemset, int>>> results(omp_get_max_threads());

    unordered_map<string, roaring::Roaring> item_bitmaps;
    for (size_t tid = 0; tid < dataset.size(); ++tid) {
        for (const string& item : dataset[tid]) {
            item_bitmaps[item].add(tid);
        }
    }
    vector<Itemset>().swap(dataset);

    vector<vector<pair<Itemset, roaring::Roaring>>> thread_frequent_itemsets(omp_get_max_threads());


    int i = 0;
    for (auto& it : item_bitmaps) {
        const string& item = it.first;
        roaring::Roaring& bitmap = it.second;
        int support = calculate_support(bitmap);
        if (support >= minsup) {
            thread_frequent_itemsets[i].push_back({{item}, bitmap});
            results[0].push_back({{item}, support});
            i=(i+1)%omp_get_max_threads();
        }
    }


    int k = 1;
    pair<vector<vector<pair<Itemset, roaring::Roaring>>>,bool> candidates = generate_candidates(thread_frequent_itemsets, k);

    k++;
    while (candidates.second) {
        vector<vector<pair<Itemset, roaring::Roaring>>> new_thread_frequent_itemsets(omp_get_max_threads());

            #pragma omp parallel for num_threads(omp_get_max_threads()) schedule(guided)
            for (int i = 0;i < candidates.first.size();i++) {
              if (i < candidates.first.size()){ //a causa di g++
                for (int j = 0;j < candidates.first[i].size();j++) {

                    int support = calculate_support(candidates.first[i][j].second);
                    if (support >= minsup) {
                        new_thread_frequent_itemsets[omp_get_thread_num()].push_back(candidates.first[i][j]);
                        results[omp_get_thread_num()].push_back({ candidates.first[i][j].first, support });
                    }
                }
              }
            }


        candidates = generate_candidates(new_thread_frequent_itemsets, k);
        k++;
    }
    return results;
}



int main() {
    bool primaLettura = false;
    int minsup = 50;
    string nomeDataset = "input_50000";
    TransactionDB dataset;
    if (primaLettura) {
        dataset = read_csv_and_save_binary("C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".csv", "C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".dat");
        return 0;
    }
    else
        dataset = read_binary(nomeDataset + ".dat");

    omp_set_num_threads(26);
    std::cout << "Inizio esecuzione con: " << omp_get_max_threads() << " Threads" << endl;
    auto start = steady_clock::now();
    vector<vector<pair<Itemset,int>>> results = apriori_roaring_bitmap(dataset, minsup);
    auto stop = steady_clock::now();

    std::chrono::duration<double> duration = stop - start;


    ofstream outfile("roaringBitMapResult_"+nomeDataset+".txt");
    for (const auto& thread : results) {
        for (const auto& result : thread) {
            outfile << "Frequent itemset: ";
            for (const auto& item : result.first) {
                outfile << item << " ";
            }
            outfile << "Support: " << result.second << endl;
        }
    }
    outfile.close();
    std::cout << "Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt " <<endl;
    std::cout << "Tempo di esecuzione APriori con Roaring BitMap e OpenMP: " << duration.count() << " s" << endl;
    return 0;
}


Overwriting code1.cpp


In [None]:
!g++ -fopenmp -o run_me code1.cpp -I CRoaring/include CRoaring/build/src/libroaring.a
!./run_me


File binario letto!
Inizio esecuzione con: 26 Threads
Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt 
Tempo di esecuzione APriori con Roaring BitMap e OpenMP: 0.722766 s


# Versione con creazione unica dei thread, ma scambio informazioni tra gli stessi

In [None]:
%%writefile code2.cpp
#include <iostream>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <roaring/roaring.hh>
#include <omp.h>
#include <string>
#include <algorithm>
#include <regex>
#include <chrono>
#include <set>

using namespace std;
using namespace std::chrono;

using Itemset = set<string>;
using TransactionDB = vector<Itemset>;

TransactionDB read_csv_and_save_binary(const string& filename, const string& binary_filename) {
    std::cout << "Lettura e Salvataggio in file binario ..." << endl;
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    // Leggi il file CSV
    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }

    // Ora salva il dataset in un formato binario
    ofstream outfile(binary_filename, ios::binary);
    for (const auto& transaction : dataset) {
        size_t size = transaction.size();
        outfile.write(reinterpret_cast<const char*>(&size), sizeof(size));  // scrivi la dimensione della transazione
        for (const auto& item : transaction) {
            size_t length = item.size();
            outfile.write(reinterpret_cast<const char*>(&length), sizeof(length));  // scrivi la lunghezza della stringa
            outfile.write(item.c_str(), length);  // scrivi la stringa
        }
    }
    std::cout << "File CSV letto e salvato in formato binario!" << endl;
    return dataset;
}

// Funzione per leggere il file binario e ricostruire il dataset
TransactionDB read_binary(const string& binary_filename) {
    ifstream infile(binary_filename, ios::binary);
    TransactionDB dataset;

    while (infile) {
        size_t size;
        infile.read(reinterpret_cast<char*>(&size), sizeof(size));
        if (infile.eof()) break;  // fine del file

        Itemset transaction;
        for (size_t i = 0; i < size; ++i) {
            size_t length;
            infile.read(reinterpret_cast<char*>(&length), sizeof(length));
            string item(length, '\0');
            infile.read(&item[0], length);
            transform(item.begin(), item.end(), item.begin(), ::tolower);  // conversione in lowercase
            transaction.insert(item);
        }
        dataset.push_back(transaction);
    }

    std::cout << "File binario letto!" << endl;
    return dataset;
}


TransactionDB read_csv(const string& filename) {
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }
    std::cout << "Lettura file completata! \nInizio versione con Roaring BitMap e OpenMP dell'algoritmo APriori ..." << endl;
    return dataset;
}


uint64_t calculate_support(const roaring::Roaring& bitmap) {
    return bitmap.cardinality();
}


vector<vector<pair<Itemset, int>>> apriori_roaring_bitmap(TransactionDB& dataset, int minsup) {

    vector<vector<pair<Itemset, int>>> results(omp_get_max_threads());  // Risultati separati per thread

    unordered_map<string, roaring::Roaring> item_bitmaps;
    for (size_t tid = 0; tid < dataset.size(); ++tid) {
        for (const string& item : dataset[tid]) {
            item_bitmaps[item].add(tid);
        }
    }
    vector<Itemset>().swap(dataset);

    vector<pair<Itemset, roaring::Roaring>> candidates;

    for (auto& it : item_bitmaps) {
        const string& item = it.first;
        roaring::Roaring& bitmap = it.second;
        int support = calculate_support(bitmap);
        if (support >= minsup) {
            candidates.push_back({ {item}, bitmap });
        }
    }

    vector<pair<Itemset, roaring::Roaring>> global_frequent_itemsets;
    #pragma omp parallel num_threads(omp_get_max_threads())
    {
        int k = 1;
        while (!candidates.empty()) {

            int CHUNK_SIZE = (int)(candidates.size() / omp_get_max_threads());
            int start = omp_get_thread_num() * CHUNK_SIZE;
            int end = start + CHUNK_SIZE;
            if (omp_get_thread_num() == omp_get_max_threads() - 1) {
                end = candidates.size();
                CHUNK_SIZE = end - start;
            }

            vector<pair<Itemset, roaring::Roaring>> local_frequent_itemsets;


            for (int i = start;i < end;i++) {
                int support = calculate_support(candidates[i].second);
                if (support >= minsup) {
                    local_frequent_itemsets.push_back(candidates[i]);
                    results[omp_get_thread_num()].push_back({ candidates[i].first, support });
                }
            }

            #pragma omp barrier
            #pragma omp master
            {
                global_frequent_itemsets.clear();
            }
            #pragma omp barrier
            #pragma omp critical
            {
                global_frequent_itemsets.insert(global_frequent_itemsets.end(), std::make_move_iterator(local_frequent_itemsets.begin()), std::make_move_iterator(local_frequent_itemsets.end()));;
            }
            #pragma omp barrier

            start = omp_get_thread_num();
            end = global_frequent_itemsets.size();
            int step = omp_get_max_threads();

            vector<pair<Itemset, roaring::Roaring>> local_candidates;

            for (int i = start; i < end; i+=step) {

                auto& itemset_i = global_frequent_itemsets[i].first;
                auto& roaring_i = global_frequent_itemsets[i].second;
                for (int j = i + 1; j < end; ++j) {
                    auto& itemset_j = global_frequent_itemsets[j].first;
                    auto& roaring_j = global_frequent_itemsets[j].second;


                    auto it1 = itemset_i.begin();
                    auto it2 = itemset_j.begin();
                    bool prefissoUguale = true;
                    for (int x = 0; x < k - 1; ++x, ++it1, ++it2) {
                        if (*it1 != *it2) {
                            prefissoUguale = false;
                            break;
                        }
                    }


                    if (!prefissoUguale) continue;

                    Itemset combined = itemset_i;
                    combined.insert(*itemset_j.rbegin());

                    roaring::Roaring combined_bitmap = roaring_i & roaring_j;
                    local_candidates.emplace_back(combined, combined_bitmap);
                }
            }

            #pragma omp barrier
            #pragma omp master
            {
                candidates.clear();
            }
            #pragma omp barrier
            #pragma omp critical
            {
                candidates.insert(candidates.end(), std::make_move_iterator(local_candidates.begin()), std::make_move_iterator(local_candidates.end()));;
            }
            #pragma omp barrier
            k++;
        }
    }
    return results;

}

int main() {
    bool primaLettura = false;
    int minsup = 50;
    string nomeDataset = "input_50000";
    TransactionDB dataset;
    if (primaLettura) {
        dataset = read_csv_and_save_binary("C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".csv", "C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".dat");
        return 0;
    }
    else
        dataset = read_binary( nomeDataset + ".dat");

    omp_set_num_threads(2);
    std::cout << "Inizio esecuzione con: " << omp_get_max_threads() << " Threads" << endl;
    auto start = steady_clock::now();
    vector<vector<pair<Itemset,int>>> results = apriori_roaring_bitmap(dataset, minsup);
    auto stop = steady_clock::now();

    std::chrono::duration<double> duration = stop - start;


    ofstream outfile("roaringBitMapResult_"+nomeDataset+".txt");
    for (const auto& thread : results) {
        for (const auto& result : thread) {
            outfile << "Frequent itemset: ";
            for (const auto& item : result.first) {
                outfile << item << " ";
            }
            outfile << "Support: " << result.second << endl;
        }
    }
    outfile.close();
    std::cout << "Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt " <<endl;
    std::cout << "Tempo di esecuzione APriori con Roaring BitMap e OpenMP: " << duration.count() << " s" << endl;
    return 0;
}


Overwriting code2.cpp


In [None]:
!g++ -fopenmp -o run_me code2.cpp -I CRoaring/include CRoaring/build/src/libroaring.a
!./run_me

File binario letto!
Inizio esecuzione con: 2 Threads
Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt 
Tempo di esecuzione APriori con Roaring BitMap e OpenMP: 0.52233 s


# Versione con creazione unica dei thread, ma con barriere di sincronizzazione


In [None]:
%%writefile code3.cpp
#include <iostream>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <roaring/roaring.hh>
#include <omp.h>
#include <string>
#include <algorithm>
#include <regex>
#include <chrono>
#include <set>
#include <map>

using namespace std;
using namespace std::chrono;

using Itemset = set<string>;
using TransactionDB = vector<Itemset>;

TransactionDB read_csv_and_save_binary(const string& filename, const string& binary_filename) {
    cout << "Lettura e Salvataggio in file binario ..." << endl;
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    // Leggi il file CSV
    getline(infile, line);
    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }

    // Ora salva il dataset in un formato binario
    ofstream outfile(binary_filename, ios::binary);
    for (const auto& transaction : dataset) {
        size_t size = transaction.size();
        outfile.write(reinterpret_cast<const char*>(&size), sizeof(size));  // scrivi la dimensione della transazione
        for (const auto& item : transaction) {
            size_t length = item.size();
            outfile.write(reinterpret_cast<const char*>(&length), sizeof(length));  // scrivi la lunghezza della stringa
            outfile.write(item.c_str(), length);  // scrivi la stringa
        }
    }
    cout << "File CSV letto e salvato in formato binario!" << endl;
    return dataset;
}

// Funzione per leggere il file binario e ricostruire il dataset
TransactionDB read_binary(const string& binary_filename) {
    ifstream infile(binary_filename, ios::binary);
    TransactionDB dataset;

    while (infile) {
        size_t size;
        infile.read(reinterpret_cast<char*>(&size), sizeof(size));
        if (infile.eof()) break;  // fine del file

        Itemset transaction;
        for (size_t i = 0; i < size; ++i) {
            size_t length;
            infile.read(reinterpret_cast<char*>(&length), sizeof(length));
            string item(length, '\0');
            infile.read(&item[0], length);
            transform(item.begin(), item.end(), item.begin(), ::tolower);  // conversione in lowercase
            transaction.insert(item);
        }
        dataset.push_back(transaction);
    }

    cout << "File binario letto!" << endl;
    return dataset;
}

TransactionDB read_csv(const string& filename) {
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }
    cout << "Lettura file completata! \nInizio versione con Roaring BitMap e OpenMP dell'algoritmo APriori ..." << endl;
    return dataset;
}


vector<vector<pair<Itemset, int>>> apriori_roaring_bitmap(TransactionDB& dataset, int minsup, int numThreads) {
    vector<vector<pair<Itemset, int>>> results(numThreads);

    unordered_map<string, roaring::Roaring> item_bitmaps;
    for (int tid = 0; tid < dataset.size(); ++tid) {
        for (const string& item : dataset[tid]) {
            item_bitmaps[item].add(tid);
        }
    }

    alignas(64) vector<unique_ptr<vector<pair<Itemset, roaring::Roaring>>>> candidates;
    bool candidates_second = true;
    candidates.resize(numThreads);
    for (int i = 0; i < numThreads; ++i) {
        candidates[i] = make_unique<vector<pair<Itemset, roaring::Roaring>>>();
    }

    int i = 0;
    for (auto& it : item_bitmaps) {
        const string& item = it.first;
        roaring::Roaring& bitmap = it.second;
        char letteraIniziale = item[0];
        candidates[i]->push_back({ {item}, bitmap });
        i = (i + 1) % numThreads;
    }

    vector<unique_ptr<vector<pair<Itemset, roaring::Roaring>>>> new_thread_frequent_itemsets(numThreads);

    #pragma omp parallel num_threads(numThreads)
    {

        int k = 1;
        int tid = omp_get_thread_num();
        new_thread_frequent_itemsets[tid] = make_unique<vector<pair<Itemset, roaring::Roaring>>>();
        while (candidates_second) {
            for (int t = 0;t < candidates[tid]->size();t++) {
                int support = candidates[tid]->at(t).second.cardinality();
                if (support >= minsup) {
                    new_thread_frequent_itemsets[tid]->push_back(candidates[tid]->at(t));
                    results[tid].push_back({ candidates[tid]->at(t).first, support });
                }
            }

            #pragma omp barrier

            #pragma omp single nowait
            {
                candidates_second = false;
            }

            candidates[tid]->clear();
            int numeroThread = numThreads;
            bool inserimento = false;

            vector<pair<Itemset, roaring::Roaring>> itemsetsTCorr = *(new_thread_frequent_itemsets[tid]);
            int n1 = itemsetsTCorr.size();

            for (int t2 = tid;t2 < numeroThread; ++t2) {
                    vector<pair<Itemset, roaring::Roaring>> itemsetsT2 = *(new_thread_frequent_itemsets[t2]);
                    int n2 = itemsetsT2.size();

                    for (int i = 0; i < n1; ++i) {
                        int j;
                        if (tid == t2)
                            j = i + 1;
                        else
                            j = 0;
                        auto& itemset_i = itemsetsTCorr[i].first;
                        for (j; j < n2; ++j) {
                            auto& itemset_j = itemsetsT2[j].first;

                            auto it1 = itemset_i.begin();
                            auto it2 = itemset_j.begin();
                            bool prefissoUguale = true;

                            for (int x = 0; x < k - 1; ++x) {
                                if (*it1 != *it2) {
                                    prefissoUguale = false;
                                    break;
                                }
                                ++it1;
                                ++it2;
                            }

                            if (!prefissoUguale) continue;

                            Itemset combined = itemset_i;
                            combined.insert(*itemset_j.rbegin());

                            roaring::Roaring combined_bitmap = itemsetsTCorr[i].second & itemsetsT2[j].second;

                            candidates[tid]->emplace_back(combined, combined_bitmap);
                            if (!inserimento) {
                                inserimento = true;
                            }
                        }
                    }
                }

            if (inserimento)
                candidates_second = true;
           #pragma omp barrier
            new_thread_frequent_itemsets[tid]->clear();
            k++;
    }
}
    return results;
}


int main() {
    bool primaLettura = false;
    int minsup = 50;
    string nomeDataset = "input_50000";
    TransactionDB dataset;
    if (primaLettura) {
        dataset = read_csv_and_save_binary("C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".csv", "C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".dat");
        cout << dataset.size() << endl;
        return 0;
    }
    else
        dataset = read_binary(nomeDataset + ".dat");

    cout << dataset.size() << endl;

    int numThreads = 26;
    cout << "Inizio esecuzione con: " << numThreads << " Threads" << endl;
    auto start = steady_clock::now();
    vector<vector<pair<Itemset, int>>> results = apriori_roaring_bitmap(dataset, minsup,numThreads);
    auto stop = steady_clock::now();

    std::chrono::duration<double> duration = stop - start;
    cout << "Tempo di esecuzione APriori con Roaring BitMap e OpenMP con  "<< numThreads << " =" << duration.count() << " s" << endl;


    ofstream outfile("roaringBitMapResult_" + nomeDataset + ".txt");
    for (const auto& thread : results) {
        for (const auto& result : thread) {
            outfile << "Frequent itemset: ";
            for (const auto& item : result.first) {
                outfile << item << " ";
            }
            outfile << "Support: " << result.second << endl;
        }
    }
    outfile.close();
    cout << "Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt " << endl;
    return 0;
}


Overwriting code3.cpp


In [None]:
!g++ -fopenmp -o run_me code3.cpp -I CRoaring/include CRoaring/build/src/libroaring.a
!./run_me

File binario letto!
50001
Inizio esecuzione con: 26 Threads
Tempo di esecuzione APriori con Roaring BitMap e OpenMP con  26 =0.526521 s
Analisi completata, frequent itemsets presenti all'interno del file roaringBitMapResult.txt 


# Versione con creazione unica dei thread e minimizzazione della sincronizzazione

In [None]:
%%writefile code4.cpp
#include <iostream>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <roaring/roaring.hh>
#include <omp.h>
#include <string>
#include <algorithm>
#include <regex>
#include <chrono>
#include <set>
#include <map>

using namespace std;
using namespace std::chrono;

using Itemset = set<string>;
using TransactionDB = vector<Itemset>;

TransactionDB read_csv_and_save_binary(const string& filename, const string& binary_filename) {
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    // Leggi il file CSV
    getline(infile, line);
    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }

    // Ora salva il dataset in un formato binario
    ofstream outfile(binary_filename, ios::binary);
    for (const auto& transaction : dataset) {
        size_t size = transaction.size();
        outfile.write(reinterpret_cast<const char*>(&size), sizeof(size));  // scrivi la dimensione della transazione
        for (const auto& item : transaction) {
            size_t length = item.size();
            outfile.write(reinterpret_cast<const char*>(&length), sizeof(length));  // scrivi la lunghezza della stringa
            outfile.write(item.c_str(), length);  // scrivi la stringa
        }
    }
    return dataset;
}

// Funzione per leggere il file binario e ricostruire il dataset
TransactionDB read_binary(const string& binary_filename) {
    ifstream infile(binary_filename, ios::binary);
    TransactionDB dataset;

    while (infile) {
        size_t size;
        infile.read(reinterpret_cast<char*>(&size), sizeof(size));
        if (infile.eof()) break;  // fine del file

        Itemset transaction;
        for (size_t i = 0; i < size; ++i) {
            size_t length;
            infile.read(reinterpret_cast<char*>(&length), sizeof(length));
            string item(length, '\0');
            infile.read(&item[0], length);
            transform(item.begin(), item.end(), item.begin(), ::tolower);  // conversione in lowercase
            transaction.insert(item);
        }
        dataset.push_back(transaction);
    }

    return dataset;
}

TransactionDB read_csv(const string& filename) {
    ifstream infile(filename);
    TransactionDB dataset;
    string line;
    regex pattern("'([^']*)'");

    while (getline(infile, line)) {
        Itemset transaction;
        sregex_iterator iter(line.begin(), line.end(), pattern);
        sregex_iterator end;

        for (; iter != end; ++iter) {
            string item = (*iter)[1].str();
            transform(item.begin(), item.end(), item.begin(), ::tolower); // conversione in lowercase
            transaction.insert(item);
        }

        dataset.push_back(transaction);
    }
    return dataset;
}


vector<vector<pair<Itemset, int>>> apriori_roaring_bitmap(TransactionDB& dataset, int minsup,int numThreads) {
    vector<vector<pair<Itemset, int>>> results(numThreads);

    std::map<string,roaring::Roaring> item_bitmaps;
    for (int tid = 0; tid < dataset.size(); ++tid) {
        for (const string& item : dataset[tid]) {
            item_bitmaps[item].add(tid);
        }
    }

    alignas(64) vector<unique_ptr<vector<pair<Itemset, roaring::Roaring>>>> candidates;
    bool candidates_second = true;
    candidates.resize(numThreads);
    for (int i = 0; i < numThreads; ++i) {
        candidates[i] = make_unique<vector<pair<Itemset, roaring::Roaring>>>();
    }

    int lettereAlfabeto = 26;


        int letterePerThread =  int(lettereAlfabeto / numThreads);
        int i = 0;
        int contatore = 0;
        char iniziale = 'a';
        for (auto& it : item_bitmaps) {
            const string& item = it.first;
            roaring::Roaring& bitmap = it.second;
            char letteraIniziale = item[0];
            if (letteraIniziale == iniziale)
                candidates[i]->push_back({ {item}, bitmap });
            else
            {
                iniziale = letteraIniziale;
                contatore++;

                if (contatore == letterePerThread && i != numThreads - 1) {
                    i = (i + 1);
                    contatore = 0;
                }
                candidates[i]->push_back({ {item}, bitmap });
            }
    }

    vector<unique_ptr<vector<pair<Itemset, roaring::Roaring>>>> new_thread_frequent_itemsets(numThreads);

    #pragma omp parallel num_threads(numThreads)
    {

        bool primaIterazione = true;
        int k = 1;
        int tid = omp_get_thread_num();
        new_thread_frequent_itemsets[tid] = make_unique<vector<pair<Itemset, roaring::Roaring>>>();
        bool inserimento = true;

        while (inserimento) {
            for (int t = 0;t < candidates[tid]->size();t++) {
                int support = candidates[tid]->at(t).second.cardinality();
                if (support >= minsup) {
                    new_thread_frequent_itemsets[tid]->push_back(candidates[tid]->at(t));
                    results[tid].push_back({ candidates[tid]->at(t).first, support });
                }
            }

             if (primaIterazione)
             {
                #pragma omp barrier
             }

            candidates[tid]->clear();
            int numeroThread = numThreads;
            inserimento = false;

            vector<pair<Itemset, roaring::Roaring>> itemsetsTCorr = *(new_thread_frequent_itemsets[tid]);
            int n1 = itemsetsTCorr.size();

            if (primaIterazione) {
                for (int t2 = tid;t2 < numeroThread; ++t2) {
                    vector<pair<Itemset, roaring::Roaring>> itemsetsT2 = *(new_thread_frequent_itemsets[t2]);

                    int n2 = itemsetsT2.size();

                    for (int i = 0; i < n1; ++i) {
                        int j;
                        if (tid == t2)
                            j = i + 1;
                        else
                            j = 0;
                        auto& itemset_i = itemsetsTCorr[i].first;
                        for (j; j < n2; ++j) {
                            auto& itemset_j = itemsetsT2[j].first;

                            auto it1 = itemset_i.begin();
                            auto it2 = itemset_j.begin();
                            bool prefissoUguale = true;

                            //#pragma omp parallel for
                            for (int x = 0; x < k - 1; ++x) {
                                if (*it1 != *it2) {
                                    prefissoUguale = false;
                                    break;
                                }
                                ++it1;
                                ++it2;
                            }

                            if (!prefissoUguale) continue;

                            Itemset combined = itemset_i;
                            combined.insert(*itemset_j.rbegin());

                            roaring::Roaring combined_bitmap = itemsetsTCorr[i].second & itemsetsT2[j].second;

                            candidates[tid]->emplace_back(combined, combined_bitmap);
                            if (!inserimento) {
                                inserimento = true;
                            }
                        }
                    }
                }
                if (primaIterazione) {
                    #pragma omp barrier
                    primaIterazione = false;
                }

            }
            else {
                for (int i = 0; i < n1; ++i) {
                    auto& itemset_i = itemsetsTCorr[i].first;
                    for (int j=i+1; j < n1; ++j) {
                        auto& itemset_j = itemsetsTCorr[j].first;

                        auto it1 = itemset_i.begin();
                        auto it2 = itemset_j.begin();
                        bool prefissoUguale = true;

                        for (int x = 0; x < k - 1; ++x) {
                            if (*it1 != *it2) {
                                prefissoUguale = false;
                                break;
                            }
                            ++it1;
                            ++it2;
                        }


                        if (!prefissoUguale) continue;

                        Itemset combined = itemset_i;
                        combined.insert(*itemset_j.rbegin());

                        roaring::Roaring combined_bitmap = itemsetsTCorr[i].second & itemsetsTCorr[j].second;

                        candidates[tid]->emplace_back(combined, combined_bitmap);
                        if (!inserimento) {
                            inserimento = true;
                        }
                    }
                }
            }
            new_thread_frequent_itemsets[tid]->clear();
                k++;
            }
    }

    return results;
}


int main() {
    bool primaLettura = false;
    int minsup = 50;
    string nomeDataset = "input_50000";
    TransactionDB dataset;
    if (primaLettura) {
        dataset = read_csv_and_save_binary("C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".csv", "C:/Users/franc/Desktop/Uni/HPC/Input/" + nomeDataset + ".dat");
        return 0;
    }
    else
        dataset = read_binary(nomeDataset + ".dat");


    int numThreads = 26;
    auto start = steady_clock::now();
    vector<vector<pair<Itemset, int>>> results = apriori_roaring_bitmap(dataset, minsup,numThreads);
    auto stop = steady_clock::now();

    std::chrono::duration<double> duration = stop - start;

    std::cout << "Tempo di esecuzione: " << duration.count() << " secondi" << std::endl;




    ofstream outfile("roaringBitMapResult_" + nomeDataset + ".txt");
    for (const auto& thread : results) {
        for (const auto& result : thread) {
            outfile << "Frequent itemset: ";
            for (const auto& item : result.first) {
                outfile << item << " ";
            }
            outfile << "Support: " << result.second << endl;
        }
    }
    outfile.close();
    return 0;
}


Overwriting code4.cpp


In [None]:
!g++ -fopenmp -o run_me code4.cpp -I CRoaring/include CRoaring/build/src/libroaring.a
!./run_me

Tempo di esecuzione: 0.376231 secondi


/content/code.cpp