Skip to content
Permalink
Browse files

Begin humaniformat update

  • Loading branch information...
ironholds
ironholds committed Apr 22, 2016
1 parent dd50466 commit c624bbabf17c8cb81e92c0b79d18afe6a9c63e0e
@@ -0,0 +1,19 @@
# Sample .travis.yml for R projects

language: r
warnings_are_errors: false
sudo: required

env:
global:
- CRAN: http://cran.rstudio.com

r_packages:
- rmarkdown
- testthat
- knitr
- Rcpp
notifications:
email:
on_success: change
on_failure: change
@@ -1,20 +1,23 @@
Package: humaniformat
Title: A Parser for Human Names
Version: 0.5.0
Author: Oliver Keyes
Version: 0.6.0
Author: Oliver Keyes [aut, cre]
Maintainer: Oliver Keyes <ironholds@gmail.com>
Description: Human names are complicated and nonstandard things. Humaniformat, which is based on Anthony Ettinger's 'humanparser'
project (https://github.com/chovy/humanparser) provides functions for parsing human names,
making a best-guess attempt to distinguish sub-components such as prefixes, suffixes, middle names and salutations.
Description: Human names are complicated and nonstandard things. Humaniformat,
which is based on Anthony Ettinger's 'humanparser' project (https://github.com/
chovy/humanparser) provides functions for parsing human names, making a best-
guess attempt to distinguish sub-components such as prefixes, suffixes, middle
names and salutations.
License: MIT + file LICENSE
LazyData: true
URL: https://github.com/ironholds/humaniformat/
BugReports: https://github.com/ironholds/humaniformat/issues
Suggests:
Suggests:
testthat,
knitr
LinkingTo: Rcpp
Imports:
Rcpp,
methods
Imports:
Rcpp,
methods
VignetteBuilder: knitr
RoxygenNote: 5.0.1
@@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.1): do not edit by hand
# Generated by roxygen2: do not edit by hand

export("first_name<-")
export("last_name<-")
@@ -1,16 +1,15 @@
## humaniformat
A human name parser for R
## A human name parser for R

__Author:__ Oliver Keyes <br/>
__License:__ [MIT](http://opensource.org/licenses/MIT)<br/>
__Status:__ Stable

![downloads](http://cranlogs.r-pkg.org/badges/grand-total/humaniformat)
[![Travis-CI Build Status](https://travis-ci.org/Ironholds/humaniformat.svg?branch=master)](https://travis-ci.org/Ironholds/rgeolocate) ![downloads](http://cranlogs.r-pkg.org/badges/grand-total/humaniformat)

### Description
`humaniformat` (`humaniform` + `format`) is a human names parser for R. With it, you can parse names, distinguishing salutations, suffixes, and first, middle and last names. `humaniformat` recognises compound last names (and preserves them) from a wide range of cultures, although the name format itself is somewhat Western-centric (it assumes, for example, that first name comes before last name, which is not always the standard).

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
Please note that this project is released with a [Contributor Code of Conduct](https://github.com/Ironholds/humaniformat/blob/master/CONDUCT.md). By participating in this project you agree to abide by its terms.

### Installation

@@ -24,4 +23,4 @@ To get the development version:

### Dependencies
* R. Doy.
* [Rcpp](http://cran.rstudio.com/web/packages/Rcpp/)
* [Rcpp](https://cran.r-project.org/package=Rcpp)

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

@@ -6,57 +6,57 @@
using namespace Rcpp;

// parse_names
DataFrame parse_names(std::vector < std::string > names);
DataFrame parse_names(CharacterVector names);
RcppExport SEXP humaniformat_parse_names(SEXP namesSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type names(namesSEXP);
Rcpp::traits::input_parameter< CharacterVector >::type names(namesSEXP);
__result = Rcpp::wrap(parse_names(names));
return __result;
END_RCPP
}
// format_reverse
std::vector < std::string > format_reverse(std::vector < std::string > names);
CharacterVector format_reverse(CharacterVector names);
RcppExport SEXP humaniformat_format_reverse(SEXP namesSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type names(namesSEXP);
Rcpp::traits::input_parameter< CharacterVector >::type names(namesSEXP);
__result = Rcpp::wrap(format_reverse(names));
return __result;
END_RCPP
}
// format_period
std::vector < std::string > format_period(std::vector < std::string > names);
CharacterVector format_period(CharacterVector names);
RcppExport SEXP humaniformat_format_period(SEXP namesSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type names(namesSEXP);
Rcpp::traits::input_parameter< CharacterVector >::type names(namesSEXP);
__result = Rcpp::wrap(format_period(names));
return __result;
END_RCPP
}
// get_
std::vector < std::string > get_(std::vector < std::string > names, int element);
CharacterVector get_(CharacterVector names, int element);
RcppExport SEXP humaniformat_get_(SEXP namesSEXP, SEXP elementSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type names(namesSEXP);
Rcpp::traits::input_parameter< CharacterVector >::type names(namesSEXP);
Rcpp::traits::input_parameter< int >::type element(elementSEXP);
__result = Rcpp::wrap(get_(names, element));
return __result;
END_RCPP
}
// set_
std::vector < std::string > set_(std::vector < std::string > names, int element, std::string replacement);
CharacterVector set_(CharacterVector names, int element, std::string replacement);
RcppExport SEXP humaniformat_set_(SEXP namesSEXP, SEXP elementSEXP, SEXP replacementSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type names(namesSEXP);
Rcpp::traits::input_parameter< CharacterVector >::type names(namesSEXP);
Rcpp::traits::input_parameter< int >::type element(elementSEXP);
Rcpp::traits::input_parameter< std::string >::type replacement(replacementSEXP);
__result = Rcpp::wrap(set_(names, element, replacement));
@@ -64,26 +64,40 @@ std::string human_format::period_format(std::string name){
return output;
}

std::vector < std::string > human_format::reverse_format_vector(std::vector < std::string > names){
CharacterVector human_format::reverse_format_vector(CharacterVector names){

for(unsigned int i = 0; i < names.size(); i++){
unsigned int input_size = names.size();
CharacterVector output(input_size);

for(unsigned int i = 0; i < input_size; i++){
if((i % 10000) == 0){
Rcpp::checkUserInterrupt();
}
names[i] = reverse_format(names[i]);
if(names[i] == NA_STRING){
output[i] = NA_STRING;
} else {
output[i] = reverse_format(Rcpp::as<std::string>(names[i]));
}
}

return names;
return output;
}

std::vector < std::string > human_format::period_format_vector(std::vector < std::string > names){
CharacterVector human_format::period_format_vector(CharacterVector names){

for(unsigned int i = 0; i < names.size(); i++){
unsigned int input_size = names.size();
CharacterVector output(input_size);

for(unsigned int i = 0; i < input_size; i++){
if((i % 10000) == 0){
Rcpp::checkUserInterrupt();
}
names[i] = period_format(names[i]);
if(names[i] == NA_STRING){
output[i] = NA_STRING;
} else {
output[i] = period_format(Rcpp::as<std::string>(names[i]));
}
}

return names;
return output;
}
@@ -41,7 +41,7 @@ class human_format: public human_parse {
*
* @return a string vector containing the reformatted names.
*/
std::vector < std::string > reverse_format_vector(std::vector < std::string > names);
CharacterVector reverse_format_vector(CharacterVector names);

/**
* A function for taking a vector of names in the form (initial.initial. lastname)
@@ -52,7 +52,7 @@ class human_format: public human_parse {
* @return a string vector containing the reformatted names, where each period-separated
* element before the first space is now space-separated.
*/
std::vector < std::string > period_format_vector(std::vector < std::string > names);
CharacterVector period_format_vector(CharacterVector names);

};

@@ -1,19 +1,21 @@
#include "human_getset.h"

std::string human_getset::get_single(std::string name, int element){
std::vector < std::string > split_name = parse_single(name);
String human_getset::get_single(std::string name, int element){
CharacterVector split_name = parse_single(name);
return split_name[element];
}

std::string human_getset::set_single(std::string name, int element, std::string replacement){
std::vector < std::string > split_name = parse_single(name);
CharacterVector split_name = parse_single(name);
split_name[element] = replacement;
std::string output;

for(unsigned int i = 0; i < split_name.size(); i++){
output.append(split_name[i]);
if(i < (split_name.size() - 1) && split_name[i] != ""){
output.append(" ");
if(split_name[i] != NA_STRING){
output.append(split_name[i]);
if(i < (split_name.size() - 1) && split_name[i] != ""){
output.append(" ");
}
}
}

@@ -25,21 +27,32 @@ std::string human_getset::set_single(std::string name, int element, std::string
return output;
}

std::vector < std::string > human_getset::get_vector(std::vector < std::string > names, int element){
CharacterVector human_getset::get_vector(CharacterVector names, int element){

CharacterVector output(names.size());
String test;
for(unsigned int i = 0; i < names.size(); i++){
names[i] = get_single(names[i], element);
if(names[i] == NA_STRING){
output[i] = NA_STRING;
} else {
output[i] = get_single(Rcpp::as<std::string>(names[i]), element);
}
}

return names;
return output;
}

std::vector < std::string > human_getset::set_vector(std::vector < std::string > names, int element,
std::string replacement){
CharacterVector human_getset::set_vector(CharacterVector names, int element, std::string replacement){

CharacterVector output(names.size());

for(unsigned int i = 0; i < names.size(); i++){
names[i] = set_single(names[i], element, replacement);
if(names[i] == NA_STRING){
output[i] = NA_STRING;
} else {
output[i] = set_single(Rcpp::as<std::string>(names[i]), element, replacement);
}
}

return names;
return output;
}

0 comments on commit c624bba

Please sign in to comment.
You can’t perform that action at this time.