Skip to content

Commit

Permalink
use cpp11 std::unordered_map in place of tr1, issue #8
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisMuir committed Mar 26, 2018
1 parent 7bfd13c commit a6a1110
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 40 deletions.
1 change: 1 addition & 0 deletions src/Makevars
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CXX_STD = CXX11
1 change: 1 addition & 0 deletions src/Makevars.win
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CXX_STD = CXX11
15 changes: 3 additions & 12 deletions src/key_collision_merge_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ CharacterVector merge_KC_clusters_no_dict(CharacterVector clusters,
// Create unordered_map, using clusters as keys, values will be the indices
// of each cluster in keys_vect.
std::vector<std::string> cl = as<std::vector<std::string> >(clusters);
unordered_map<std::string, std::vector<int> > keys_vect_map = create_map(
keys_vect,
cl
);
refinr_map keys_vect_map = create_map(keys_vect, cl);

// Iterate over clusters, make mass edits to output.
std::vector<std::string>::iterator clust_end = cl.end();
Expand Down Expand Up @@ -90,14 +87,8 @@ CharacterVector merge_KC_clusters_dict(CharacterVector clusters,
// keys_vect_map will be the indices of each cluster in keys_vect. Values for
// keys_dict_map will be the indices of each cluster in keys_dict.
std::vector<std::string> cl = as<std::vector<std::string> >(clusters);
unordered_map<std::string, std::vector<int> > keys_vect_map = create_map(
keys_vect,
cl
);
unordered_map<std::string, std::vector<int> > keys_dict_map = create_map(
keys_dict,
cl
);
refinr_map keys_vect_map = create_map(keys_vect, cl);
refinr_map keys_dict_map = create_map(keys_dict, cl);

// Iterate over clusters, make mass edits to output.
std::vector<std::string>::iterator clust_end = cl.end();
Expand Down
20 changes: 7 additions & 13 deletions src/n_gram_merge_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,24 @@
#include"refinr_header.h"
using namespace Rcpp;


// Iterate over all clusters, make mass edits related to each cluster.
// [[Rcpp::export]]
CharacterVector merge_ngram_clusters(List clusters,
CharacterVector clust_unlist,
CharacterVector n_gram_keys,
CharacterVector univect,
CharacterVector vect) {
//int vect_len = vect.size();
CharacterVector output = clone(vect);

// Subset clust_unlist to only keep unique values.
clust_unlist = unique(clust_unlist);

// Create maps
std::vector<std::string> cl_ul = as<std::vector<std::string> >(clust_unlist);
unordered_map<std::string, std::vector<int> > ngram_map = create_map(
n_gram_keys,
cl_ul
);
std::vector<std::string> uni = as<std::vector<std::string> >(univect);
unordered_map<std::string, std::vector<int> > univect_map = create_map(
vect,
uni
);
refinr_map ngram_map = create_map(n_gram_keys, cl_ul);
refinr_map univect_map = create_map(vect, uni);

List::iterator clust_end = clusters.end();
List::iterator iter;
Expand Down Expand Up @@ -95,10 +92,7 @@ List get_ngram_initial_clusters(CharacterVector ngram_keys,
// Create unordered_map, using unigram_dups as keys, values will be the
// indices of each dup in unigram_keys.
std::vector<std::string> dups = as<std::vector<std::string> >(unigram_dups);
unordered_map<std::string, std::vector<int> > unigram_map = create_map(
unigram_keys,
dups
);
refinr_map unigram_map = create_map(unigram_keys, dups);

// Iterate over unigram_dups, for each value get the corresponding indices
// from unigram_map, Use those indices to subset ngram_keys, save the subset
Expand Down
12 changes: 9 additions & 3 deletions src/refinr_header.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
#include <Rcpp.h>
using namespace Rcpp;

#include <tr1/unordered_map>
using std::tr1::unordered_map;
// [[Rcpp::plugins(cpp11)]]

// Define std::unordered_map using std::string as keys and std::vector<int>
// as values.
typedef std::unordered_map<std::string, std::vector<int> > refinr_map;

// utils
LogicalVector equality(CharacterVector lookupvect, String charstring);
LogicalVector cpp_in(CharacterVector x, CharacterVector y);
bool complete_intersect(CharacterVector a, CharacterVector b);
Expand All @@ -12,6 +16,8 @@ CharacterVector cpp_paste_list(List input, std::string collapse_str);
CharacterVector cpp_get_key_dups(CharacterVector keys);
List cpp_flatten_list(List list_obj);
String most_freq_str(CharacterVector x);
unordered_map <std::string, std::vector<int> > create_map(CharacterVector vect, std::vector<std::string> clusters);
refinr_map create_map(CharacterVector vect, std::vector<std::string> clusters);

// key_collision_merge_funcs
CharacterVector merge_KC_clusters_no_dict(CharacterVector clusters, CharacterVector vect, CharacterVector keys_vect);
CharacterVector merge_KC_clusters_dict(CharacterVector clusters, CharacterVector vect, CharacterVector keys_vect, CharacterVector dict, CharacterVector keys_dict);
26 changes: 14 additions & 12 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,32 @@
#include"refinr_header.h"
using namespace Rcpp;


// Utility helper functions, some are used within R functions, some are used
// within c++ functions, some are used in both.


// Create unordered_map with strings as keys, and integer vectors as values.
unordered_map <std::string, std::vector<int> > create_map(
CharacterVector vect,
std::vector<std::string> clusters
) {
int clust_len = clusters.size();
int vect_len = vect.size();
refinr_map create_map(CharacterVector terms,
std::vector<std::string> keys) {
int keys_len = keys.size();
int terms_len = terms.size();

// Initialize unordered_map.
unordered_map<std::string, std::vector<int> > clust_map;
for(int i = 0; i < clust_len; ++i) {
clust_map[clusters[i]];
refinr_map out;
for(int i = 0; i < keys_len; ++i) {
out[keys[i]];
}

// Fill values of the map.
for(int i = 0; i < vect_len; ++i) {
clust_map[as<std::string>(vect[i])].push_back(i);
for(int i = 0; i < terms_len; ++i) {
refinr_map::iterator val = out.find(as<std::string>(terms[i]));
if(val != out.end()) {
val->second.push_back(i);
}
}

return(clust_map);
return(out);
}

// Given a CharacterVector, return the string that appears most frequently.
Expand Down

0 comments on commit a6a1110

Please sign in to comment.